tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

util.py (14719B)


      1 # This Source Code Form is subject to the terms of the Mozilla Public
      2 # License, v. 2.0. If a copy of the MPL was not distributed with this
      3 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      4 #
      5 # This module needs to stay Python 2 and 3 compatible
      6 #
      7 import contextlib
      8 import os
      9 import platform
     10 import shutil
     11 import sys
     12 import tempfile
     13 import time
     14 from subprocess import PIPE, Popen
     15 
     16 import mozlog
     17 import requests
     18 import yaml
     19 from requests.exceptions import ConnectionError
     20 from requests.packages.urllib3.util.retry import Retry
     21 
     22 from condprof import progress
     23 
     24 TASK_CLUSTER = "MOZ_AUTOMATION" in os.environ.keys()
     25 DOWNLOAD_TIMEOUT = 30
     26 
     27 
     28 class ArchiveNotFound(Exception):
     29    pass
     30 
     31 
     32 DEFAULT_PREFS = {
     33    "focusmanager.testmode": True,
     34    "marionette.defaultPrefs.port": 2828,
     35    "marionette.port": 2828,
     36    "remote.log.level": "Trace",
     37    "marionette.log.truncate": False,
     38    "extensions.autoDisableScopes": 10,
     39    "devtools.debugger.remote-enabled": True,
     40    "devtools.console.stdout.content": True,
     41    "devtools.console.stdout.chrome": True,
     42 }
     43 
     44 DEFAULT_CUSTOMIZATION = os.path.join(
     45    os.path.dirname(__file__), "customization", "default.json"
     46 )
     47 STRUCTLOG_PAD_SIZE = 20
     48 
     49 
     50 class BridgeLogger:
     51    def __init__(self, logger):
     52        self.logger = logger
     53 
     54    def _find(self, text, *names):
     55        # structlog's ConsoleRenderer pads values
     56        for name in names:
     57            if name + " " * STRUCTLOG_PAD_SIZE in text:
     58                return True
     59        return False
     60 
     61    def _convert(self, message):
     62        return obfuscate(message)[1]
     63 
     64    def info(self, message, *args, **kw):
     65        if not isinstance(message, str):
     66            message = str(message)
     67        # converting Arsenic request/response struct log
     68        if self._find(message, "request", "response"):
     69            self.logger.debug(self._convert(message), *args, **kw)
     70        else:
     71            self.logger.info(self._convert(message), *args, **kw)
     72 
     73    def error(self, message, *args, **kw):
     74        self.logger.error(self._convert(message), *args, **kw)
     75 
     76    def warning(self, message, *args, **kw):
     77        self.logger.warning(self._convert(message), *args, **kw)
     78 
     79 
     80 logger = None
     81 
     82 
     83 def get_logger():
     84    global logger
     85    if logger is not None:
     86        return logger
     87    new_logger = mozlog.get_default_logger("condprof")
     88    if new_logger is None:
     89        new_logger = mozlog.unstructured.getLogger("condprof")
     90 
     91    # wrap the logger into the BridgeLogger
     92    new_logger = BridgeLogger(new_logger)
     93 
     94    try:
     95        # bridge for Arsenic
     96        from arsenic import connection
     97        from structlog import wrap_logger
     98 
     99        connection.log = wrap_logger(new_logger)
    100    except ImportError:
    101        # Arsenic is not installed for client-only usage
    102        pass
    103    logger = new_logger
    104    return logger
    105 
    106 
    107 # initializing the logger right away
    108 get_logger()
    109 
    110 
    111 def fresh_profile(profile, customization_data):
    112    from mozprofile import create_profile  # NOQA
    113 
    114    # XXX on android we mgiht need to run it on the device?
    115    logger.info("Creating a fresh profile")
    116    new_profile = create_profile(app="firefox")
    117    prefs = customization_data["prefs"]
    118    prefs.update(DEFAULT_PREFS)
    119    logger.info("Setting prefs %s" % str(prefs.items()))
    120    new_profile.set_preferences(prefs)
    121    extensions = []
    122    for name, url in customization_data["addons"].items():
    123        logger.info("Downloading addon %s" % name)
    124        # When running on the CI, we expect the xpi files to have been
    125        # fetched by the firefox-addons fetch task dependency (see
    126        # taskcluster/kinds/fetch/browsertime.yml) and the condprof-addons
    127        # linter enforces the content of the archive to be unpacked into
    128        # a subdirectory named "firefox-addons".
    129        extension = download_file(url, mozfetches_subdir="firefox-addons")
    130        extensions.append(extension)
    131    logger.info("Installing addons")
    132    new_profile.addons.install(extensions)
    133    shutil.copytree(new_profile.profile, profile)
    134    return profile
    135 
    136 
    137 link = "https://ftp.mozilla.org/pub/firefox/nightly/latest-mozilla-central/"
    138 
    139 
    140 def get_firefox_download_link():
    141    try:
    142        from bs4 import BeautifulSoup
    143    except ImportError:
    144        raise ImportError("You need to run pip install beautifulsoup4")
    145    if platform.system() == "Darwin":
    146        extension = ".dmg"
    147    elif platform.system() == "Linux":
    148        arch = platform.machine()
    149        extension = ".linux-%s.tar.xz" % arch
    150    else:
    151        raise NotImplementedError(platform.system())
    152 
    153    page = requests.get(link).text
    154    soup = BeautifulSoup(page, "html.parser")
    155    for node in soup.find_all("a", href=True):
    156        href = node["href"]
    157        if href.endswith(extension):
    158            return "https://ftp.mozilla.org" + href
    159 
    160    raise Exception()
    161 
    162 
    163 def check_exists(archive, server=None, all_types=False):
    164    if server is not None:
    165        archive = server + "/" + archive
    166    try:
    167        logger.info("Getting headers at %s" % archive)
    168        resp = requests.head(archive, timeout=DOWNLOAD_TIMEOUT)
    169    except ConnectionError:
    170        return False, {}
    171 
    172    if resp.status_code in (302, 303):
    173        logger.info("Redirected")
    174        return check_exists(resp.headers["Location"])
    175 
    176    # see Bug 1574854
    177    if (
    178        not all_types
    179        and resp.status_code == 200
    180        and "text/html" in resp.headers["Content-Type"]
    181    ):
    182        logger.info("Got an html page back")
    183        exists = False
    184    else:
    185        logger.info("Response code is %d" % resp.status_code)
    186        exists = resp.status_code
    187 
    188    return exists, resp.headers
    189 
    190 
    191 def check_mozfetches_dir(target, mozfetches_subdir):
    192    logger.info("Checking for existence of: %s in MOZ_FETCHES_DIR" % target)
    193    fetches = os.environ.get("MOZ_FETCHES_DIR")
    194    if fetches is None:
    195        return None
    196    fetches_target = os.path.join(fetches, mozfetches_subdir, target)
    197    if not os.path.exists(fetches_target):
    198        return None
    199    logger.info("Already fetched and available in MOZ_FETCHES_DIR: %s" % fetches_target)
    200    return fetches_target
    201 
    202 
    203 def download_file(url, target=None, mozfetches_subdir=None):
    204    if target is None:
    205        target = url.split("/")[-1]
    206 
    207    # check if the assets has been fetched through a taskgraph fetch task dependency
    208    # and already available in the MOZ_FETCHES_DIR passed as an additional parameter.
    209    if mozfetches_subdir is not None:
    210        filepath = check_mozfetches_dir(target, mozfetches_subdir)
    211        if filepath is not None:
    212            return filepath
    213 
    214    present, headers = check_exists(url)
    215    if not present:
    216        logger.info("Cannot find %r" % url)
    217        raise ArchiveNotFound(url)
    218 
    219    etag = headers.get("ETag")
    220 
    221    logger.info("Checking for existence of: %s" % target)
    222    if os.path.exists(target):
    223        # XXX for now, reusing downloads without checking them
    224        # when we don't have an .etag file
    225        if etag is None or not os.path.exists(target + ".etag"):
    226            logger.info("No existing etag downloads.")
    227            return target
    228        with open(target + ".etag") as f:
    229            current_etag = f.read()
    230        if etag == current_etag:
    231            logger.info("Already Downloaded.")
    232            # should at least check the size?
    233            return target
    234        else:
    235            logger.info("Changed!")
    236    else:
    237        logger.info("Could not find an existing archive.")
    238        # Add some debugging logs for the directory content
    239        try:
    240            archivedir = os.path.dirname(target)
    241            logger.info(
    242                "Content in cache directory %s: %s"
    243                % (archivedir, os.listdir(archivedir))
    244            )
    245        except Exception:
    246            logger.info("Failed to list cache directory contents")
    247 
    248    logger.info("Downloading %s" % url)
    249    req = requests.get(url, stream=True, timeout=DOWNLOAD_TIMEOUT)
    250    total_length = int(req.headers.get("content-length"))
    251    target_dir = os.path.dirname(target)
    252    if target_dir != "" and not os.path.exists(target_dir):
    253        logger.info("Creating dir %s" % target_dir)
    254        os.makedirs(target_dir)
    255 
    256    with open(target, "wb") as f:
    257        if TASK_CLUSTER:
    258            for chunk in req.iter_content(chunk_size=1024):
    259                if chunk:
    260                    f.write(chunk)
    261                    f.flush()
    262        else:
    263            iter = req.iter_content(chunk_size=1024)
    264            # pylint --py3k W1619
    265            size = total_length / 1024 + 1
    266            for chunk in progress.bar(iter, expected_size=size):
    267                if chunk:
    268                    f.write(chunk)
    269                    f.flush()
    270 
    271    if etag is not None:
    272        with open(target + ".etag", "w") as f:
    273            f.write(etag)
    274 
    275    return target
    276 
    277 
    278 def extract_from_dmg(dmg, target):
    279    mount = tempfile.mkdtemp()
    280    cmd = "hdiutil attach -nobrowse -mountpoint %s %s"
    281    os.system(cmd % (mount, dmg))
    282    try:
    283        found = False
    284        for f in os.listdir(mount):
    285            if not f.endswith(".app"):
    286                continue
    287            app = os.path.join(mount, f)
    288            shutil.copytree(app, target)
    289            found = True
    290            break
    291    finally:
    292        os.system("hdiutil detach " + mount)
    293        shutil.rmtree(mount)
    294    if not found:
    295        raise OSError("No app file found in %s" % dmg)
    296 
    297 
    298 @contextlib.contextmanager
    299 def latest_nightly(binary=None):
    300    if binary is None:
    301        # we want to use the latest nightly
    302        nightly_archive = get_firefox_download_link()
    303        logger.info("Downloading %s" % nightly_archive)
    304        target = download_file(nightly_archive)
    305        # on macOs we just mount the DMG
    306        # XXX replace with extract_from_dmg
    307        if platform.system() == "Darwin":
    308            cmd = "hdiutil attach -mountpoint /Volumes/Nightly %s"
    309            os.system(cmd % target)
    310            binary = "/Volumes/Nightly/Firefox Nightly.app/Contents/MacOS/firefox"
    311        # on linux we unpack it
    312        elif platform.system() == "Linux":
    313            # Tar should automatically recognize the compression algo (xz/bzip2)
    314            cmd = "tar -xvf %s" % target
    315            os.system(cmd)
    316            binary = "firefox/firefox"
    317 
    318        mounted = True
    319    else:
    320        mounted = False
    321    try:
    322        yield binary
    323    finally:
    324        # XXX replace with extract_from_dmg
    325        if mounted:
    326            if platform.system() == "Darwin":
    327                logger.info("Unmounting Firefox")
    328                time.sleep(10)
    329                os.system("hdiutil detach /Volumes/Nightly")
    330            elif platform.system() == "Linux":
    331                # XXX we should keep it for next time
    332                shutil.rmtree("firefox")
    333 
    334 
    335 def write_yml_file(yml_file, yml_data):
    336    logger.info("writing %s to %s" % (yml_data, yml_file))
    337    try:
    338        with open(yml_file, "w") as outfile:
    339            yaml.dump(yml_data, outfile, default_flow_style=False)
    340    except Exception:
    341        logger.error("failed to write yaml file", exc_info=True)
    342 
    343 
    344 def get_version(firefox):
    345    p = Popen([firefox, "--version"], stdin=PIPE, stdout=PIPE, stderr=PIPE)
    346    output, __ = p.communicate()
    347    first_line = output.strip().split(b"\n")[0]
    348    res = first_line.split()[-1]
    349    return res.decode("utf-8")
    350 
    351 
    352 def get_current_platform():
    353    """Returns a combination of system and arch info that matches TC standards.
    354 
    355    e.g. macosx64, win32, linux64, etc..
    356    """
    357    arch = sys.maxsize == 2**63 - 1 and "64" or "32"
    358    plat = platform.system().lower()
    359    if plat == "windows":
    360        plat = "win"
    361    elif plat == "darwin":
    362        plat = "macosx"
    363    return plat + arch
    364 
    365 
    366 class BaseEnv:
    367    def __init__(self, profile, firefox, geckodriver, archive, device_name):
    368        self.profile = profile
    369        self.firefox = firefox
    370        self.geckodriver = geckodriver
    371        if profile is None:
    372            self.profile = os.path.join(tempfile.mkdtemp(), "profile")
    373        else:
    374            self.profile = profile
    375        self.archive = archive
    376        self.device_name = device_name
    377 
    378    @property
    379    def target_platform(self):
    380        return self.get_target_platform()
    381 
    382    def get_target_platform(self):
    383        raise NotImplementedError()
    384 
    385    def get_device(self, *args, **kw):
    386        raise NotImplementedError()
    387 
    388    @contextlib.contextmanager
    389    def get_browser(self, path):
    390        raise NotImplementedError()
    391 
    392    def get_browser_args(self, headless):
    393        raise NotImplementedError()
    394 
    395    def prepare(self, logfile):
    396        pass
    397 
    398    def check_session(self, session):
    399        pass
    400 
    401    def dump_logs(self):
    402        pass
    403 
    404    def get_browser_version(self):
    405        raise NotImplementedError()
    406 
    407    def get_geckodriver(self, log_file):
    408        raise NotImplementedError()
    409 
    410    def collect_profile(self):
    411        pass
    412 
    413    def stop_browser(self):
    414        pass
    415 
    416 
    417 _URL = (
    418    "{0}/secrets/v1/secret/project"
    419    "{1}releng{1}gecko{1}build{1}level-{2}{1}conditioned-profiles"
    420 )
    421 _DEFAULT_SERVER = "https://firefox-ci-tc.services.mozilla.com"
    422 
    423 
    424 def get_tc_secret():
    425    if not TASK_CLUSTER:
    426        raise OSError("Not running in Taskcluster")
    427    session = requests.Session()
    428    retry = Retry(total=5, backoff_factor=0.1, status_forcelist=[500, 502, 503, 504])
    429    http_adapter = requests.adapters.HTTPAdapter(max_retries=retry)
    430    session.mount("https://", http_adapter)
    431    session.mount("http://", http_adapter)
    432    secrets_url = _URL.format(
    433        os.environ.get("TASKCLUSTER_PROXY_URL", _DEFAULT_SERVER),
    434        "%2F",
    435        os.environ.get("MOZ_SCM_LEVEL", "1"),
    436    )
    437    res = session.get(secrets_url, timeout=DOWNLOAD_TIMEOUT)
    438    res.raise_for_status()
    439    return res.json()["secret"]
    440 
    441 
    442 _CACHED = {}
    443 
    444 
    445 def obfuscate(text):
    446    if "CONDPROF_RUNNER" not in os.environ:
    447        return True, text
    448    username, password = get_credentials()
    449    if username is None:
    450        return False, text
    451    if username not in text and password not in text:
    452        return False, text
    453    text = text.replace(password, "<PASSWORD>")
    454    text = text.replace(username, "<USERNAME>")
    455    return True, text
    456 
    457 
    458 def obfuscate_file(path):
    459    if "CONDPROF_RUNNER" not in os.environ:
    460        return
    461    with open(path) as f:
    462        data = f.read()
    463    hit, data = obfuscate(data)
    464    if not hit:
    465        return
    466    with open(path, "w") as f:
    467        f.write(data)
    468 
    469 
    470 def get_credentials():
    471    if "creds" in _CACHED:
    472        return _CACHED["creds"]
    473    password = os.environ.get("FXA_PASSWORD")
    474    username = os.environ.get("FXA_USERNAME")
    475    if username is None or password is None:
    476        if not TASK_CLUSTER:
    477            return None, None
    478        secret = get_tc_secret()
    479        password = secret["password"]
    480        username = secret["username"]
    481    _CACHED["creds"] = username, password
    482    return username, password