tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

import-irregexp.py (5728B)


      1 #!/usr/bin/env python3
      2 
      3 # This Source Code Form is subject to the terms of the Mozilla Public
      4 # License, v. 2.0. If a copy of the MPL was not distributed with this file,
      5 # You can obtain one at http://mozilla.org/MPL/2.0/.
      6 
      7 # This script handles all the mechanical steps of importing irregexp from v8:
      8 #
      9 # 1. Acquire the source: either from github, or optionally from a local copy of v8.
     10 # 2. Copy the contents of v8/src/regexp into js/src/irregexp/imported
     11 #    - Exclude files that we have chosen not to import.
     12 # 3. While doing so, update #includes:
     13 #    - Change "src/regexp/*" to "irregexp/imported/*".
     14 #    - Remove other v8-specific headers completely.
     15 # 4. Add '#include "irregexp/RegExpShim.h" in the necessary places.
     16 # 5. Update the IRREGEXP_VERSION file to include the correct git hash.
     17 #
     18 # Usage:
     19 #  cd path/to/js/src/irregexp
     20 #  ./import-irregexp.py --path path/to/v8/src/regexp
     21 #
     22 # Alternatively, without the --path argument, import-irregexp.py will
     23 # clone v8 from github into a temporary directory.
     24 #
     25 # After running this script, changes to the shim code may be necessary
     26 # to account for changes in upstream irregexp.
     27 
     28 import os
     29 import re
     30 import subprocess
     31 import sys
     32 from pathlib import Path
     33 
     34 
     35 def copy_and_update_includes(src_path, dst_path):
     36    # List of header files that need to include the shim header
     37    need_shim = [
     38        "property-sequences.h",
     39        "regexp-ast.h",
     40        "regexp-bytecode-peephole.h",
     41        "regexp-bytecodes.h",
     42        "regexp-dotprinter.h",
     43        "regexp-error.h",
     44        "regexp.h",
     45        "regexp-macro-assembler.h",
     46        "regexp-parser.h",
     47        "regexp-stack.h",
     48        "special-case.h",
     49    ]
     50 
     51    src = open(str(src_path))
     52    dst = open(str(dst_path), "w")
     53 
     54    # 1. Rewrite includes of V8 regexp headers:
     55    #    Note that we exclude several headers and provide our own definitions.
     56    excluded = "|".join(["flags", "utils", "result-vector"])
     57    regexp_include = re.compile(f'#include "src/regexp(?!/regexp-({excluded}).h)')
     58    regexp_include_new = '#include "irregexp/imported'
     59 
     60    # 2. Remove includes of other V8 headers
     61    other_include = re.compile('#include "src/')
     62 
     63    # 3. If needed, add '#include "irregexp/RegExpShim.h"'.
     64    #    Note: We get a little fancy to ensure that header files are
     65    #    in alphabetic order. `need_to_add_shim` is true if we still
     66    #    have to add the shim header in this file. `adding_shim_now`
     67    #    is true if we have found a '#include "src/*' and we are just
     68    #    waiting to find an empty line so that we can insert the shim
     69    #    header in the right place.
     70    need_to_add_shim = src_path.name in need_shim
     71    adding_shim_now = False
     72 
     73    for line in src:
     74        if adding_shim_now:
     75            if line == "\n":
     76                dst.write('#include "irregexp/RegExpShim.h"\n')
     77                need_to_add_shim = False
     78                adding_shim_now = False
     79 
     80        if regexp_include.search(line):
     81            dst.write(re.sub(regexp_include, regexp_include_new, line))
     82        elif other_include.search(line):
     83            if need_to_add_shim:
     84                adding_shim_now = True
     85        else:
     86            dst.write(line)
     87 
     88 
     89 def import_from(srcdir, dstdir):
     90    excluded = [
     91        "DIR_METADATA",
     92        "OWNERS",
     93        "regexp.cc",
     94        "regexp-flags.h",
     95        "regexp-result-vector.cc",
     96        "regexp-result-vector.h",
     97        "regexp-utils.cc",
     98        "regexp-utils.h",
     99        "regexp-macro-assembler-arch.h",
    100    ]
    101 
    102    for file in srcdir.iterdir():
    103        if file.is_dir():
    104            continue
    105        if str(file.name) in excluded:
    106            continue
    107        copy_and_update_includes(file, dstdir / "imported" / file.name)
    108 
    109 
    110 if __name__ == "__main__":
    111    import argparse
    112    import tempfile
    113 
    114    # This script should be run from js/src/irregexp to work correctly.
    115    current_path = Path(os.getcwd())
    116    expected_path = "js/src/irregexp"
    117    if not current_path.match(expected_path):
    118        raise RuntimeError("%s must be run from %s" % (sys.argv[0], expected_path))
    119 
    120    parser = argparse.ArgumentParser(description="Import irregexp from v8")
    121    parser.add_argument("-p", "--path", help="path to v8/src/regexp", required=False)
    122    args = parser.parse_args()
    123 
    124    if args.path:
    125        src_path = Path(args.path)
    126        provided_path = "the command-line"
    127    elif "TASK_ID" in os.environ:
    128        src_path = Path("/builds/worker/v8/")
    129        subprocess.run("git pull origin master", check=True, shell=True, cwd=src_path)
    130 
    131        src_path = Path("/builds/worker/v8/src/regexp")
    132        provided_path = "the hardcoded path in the taskcluster image"
    133    elif "V8_GIT" in os.environ:
    134        src_path = Path(os.environ["V8_GIT"])
    135        provided_path = "the V8_GIT environment variable"
    136    else:
    137        tempdir = tempfile.TemporaryDirectory()
    138        v8_git = "https://github.com/v8/v8.git"
    139        clone = "git clone --depth 1 %s %s" % (v8_git, tempdir.name)
    140        os.system(clone)
    141        src_path = Path(tempdir.name) / "src/regexp"
    142        provided_path = "the temporary git checkout"
    143 
    144    if not (src_path / "regexp.h").exists():
    145        print("Could not find regexp.h in the path provided from", provided_path)
    146        print("Usage:\n  import-irregexp.py [--path <path/to/v8/src/regexp>]")
    147        sys.exit(1)
    148 
    149    if "MACH_VENDOR" not in os.environ:
    150        print(
    151            "Running this script outside ./mach vendor is not recommended - ",
    152            "You will need to update moz.yaml manually",
    153        )
    154        print("We recommend instead `./mach vendor js/src/irregexp/moz.yaml`")
    155        response = input("Type Y to continue... ")
    156        if response.lower() != "y":
    157            sys.exit(1)
    158 
    159    import_from(src_path, current_path)