tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

fetch.py (4378B)


      1 #!/usr/bin/env python3
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at https://mozilla.org/MPL/2.0/.
      5 
      6 import pathlib
      7 
      8 import click
      9 
     10 from qm_try_analysis import telemetry, utils
     11 from qm_try_analysis.logging import info
     12 
     13 """
     14 The analysis is based on the following query:
     15 https://sql.telemetry.mozilla.org/queries/78691/source?p_day=28&p_month=03&p_year=2021
     16 
     17 SELECT UNIX_MILLIS(timestamp) AS submit_timeabs,
     18       session_start_time,
     19       submission_date,
     20       build_id,
     21       client_id,
     22       session_id,
     23       event_timestamp,
     24       CAST(mozfun.map.get_key(event_map_values, "seq") AS INT64) AS seq,
     25       mozfun.map.get_key(event_map_values, "context") AS context,
     26       mozfun.map.get_key(event_map_values, "source_file") AS source_file,
     27       mozfun.map.get_key(event_map_values, "source_line") AS source_line,
     28       mozfun.map.get_key(event_map_values, "severity") AS severity,
     29       mozfun.map.get_key(event_map_values, "result") AS result,
     30 FROM telemetry.events
     31 WHERE submission_date >= CAST('{{ year }}-{{ month }}-{{ day }}' AS DATE)
     32  AND event_category='dom.quota.try'
     33  AND build_id >= '{{ build }}'
     34  AND UNIX_MILLIS(timestamp) > {{ last }}
     35 ORDER BY submit_timeabs
     36 LIMIT 600000
     37 
     38 We fetch events in chronological order, as we want to keep track of where we already
     39 arrived with our analysis. To accomplish this we write our runs into qmexecutions.json.
     40 
     41 [
     42    {
     43        "workdir": ".",
     44        "daysback": 1,
     45        "numrows": 17377,
     46        "lasteventtime": 1617303855145,
     47        "rawfile": "./qmrows_until_1617303855145.json"
     48    }
     49 ]
     50 
     51 lasteventtime is the highest value of event_timeabs we found in our data.
     52 
     53 analyze_qm_failures instead needs the rows to be ordered by
     54 client_id, session_id, thread_id, submit_timeabs, seq
     55 Thus we sort the rows accordingly before writing them.
     56 """
     57 
     58 
     59 @click.command()
     60 @click.option(
     61    "-k",
     62    "--key",
     63    required=True,
     64    help="Your personal telemetry API key.",
     65 )
     66 @click.option(
     67    "-b",
     68    "--minbuild",
     69    default="20210329000000",
     70    help="The lowest build id we will fetch data for. This should have the following format: `yyyymmddhhmmss`.",
     71 )
     72 @click.option("-d", "--days", type=int, default=7, help="Number of days to go back.")
     73 @click.option(
     74    "-l",
     75    "--lasteventtime",
     76    type=int,
     77    default=0,
     78    help="Fetch only events after this number of Unix milliseconds.",
     79 )
     80 @click.option(
     81    "-w",
     82    "--workdir",
     83    type=click.Path(file_okay=False, writable=True, path_type=pathlib.Path),
     84    default="output",
     85    help="Working directory",
     86 )
     87 def fetch_qm_failures(key, minbuild, days, lasteventtime, workdir):
     88    """
     89    Invokes the query 78691 and stores the result in a JSON file.
     90    """
     91    # Creeate output dir if it does not exist
     92    workdir.mkdir(exist_ok=True)
     93 
     94    start = utils.dateback(days)
     95    year, month, day = start.year, start.month, start.day
     96 
     97    run = {}
     98    lastrun = utils.getLastRunFromExecutionFile(workdir)
     99    if "lasteventtime" in lastrun:
    100        lasteventtime = lastrun["lasteventtime"]
    101 
    102    run["workdir"] = workdir.as_posix()
    103    run["daysback"] = days
    104    run["minbuild"] = minbuild
    105 
    106    p_params = f"p_year={year:04d}&p_month={month:02d}&p_day={day:02d}&p_build={minbuild}&p_last={lasteventtime}"
    107 
    108    # Read string at the start of the file for more information on query 78691
    109    result = telemetry.query(key, 78691, p_params)
    110    rows = result["query_result"]["data"]["rows"]
    111    run["numrows"] = len(rows)
    112 
    113    if run["numrows"] > 0:
    114        lasteventtime = telemetry.getLastEventTimeAbs(rows)
    115        run["lasteventtime"] = lasteventtime
    116        rows.sort(
    117            key=lambda row: "{}.{}.{}.{}.{:06d}".format(
    118                row["client_id"],
    119                row["session_id"],
    120                row["seq"] >> 32,  # thread_id
    121                row["submit_timeabs"],
    122                row["seq"] & 0x00000000FFFFFFFF,  # seq,
    123            ),
    124            reverse=False,
    125        )
    126        outfile = f"{workdir}/qmrows_until_{lasteventtime}.json"
    127        utils.writeJSONFile(outfile, rows)
    128        run["rawfile"] = outfile
    129    else:
    130        info("No results found, maybe next time.")
    131        run["lasteventtime"] = lasteventtime
    132 
    133    utils.addNewRunToExecutionFile(workdir, run)
    134 
    135 
    136 if __name__ == "__main__":
    137    fetch_qm_failures()