import-irregexp.py (5728B)
1 #!/usr/bin/env python3 2 3 # This Source Code Form is subject to the terms of the Mozilla Public 4 # License, v. 2.0. If a copy of the MPL was not distributed with this file, 5 # You can obtain one at http://mozilla.org/MPL/2.0/. 6 7 # This script handles all the mechanical steps of importing irregexp from v8: 8 # 9 # 1. Acquire the source: either from github, or optionally from a local copy of v8. 10 # 2. Copy the contents of v8/src/regexp into js/src/irregexp/imported 11 # - Exclude files that we have chosen not to import. 12 # 3. While doing so, update #includes: 13 # - Change "src/regexp/*" to "irregexp/imported/*". 14 # - Remove other v8-specific headers completely. 15 # 4. Add '#include "irregexp/RegExpShim.h" in the necessary places. 16 # 5. Update the IRREGEXP_VERSION file to include the correct git hash. 17 # 18 # Usage: 19 # cd path/to/js/src/irregexp 20 # ./import-irregexp.py --path path/to/v8/src/regexp 21 # 22 # Alternatively, without the --path argument, import-irregexp.py will 23 # clone v8 from github into a temporary directory. 24 # 25 # After running this script, changes to the shim code may be necessary 26 # to account for changes in upstream irregexp. 27 28 import os 29 import re 30 import subprocess 31 import sys 32 from pathlib import Path 33 34 35 def copy_and_update_includes(src_path, dst_path): 36 # List of header files that need to include the shim header 37 need_shim = [ 38 "property-sequences.h", 39 "regexp-ast.h", 40 "regexp-bytecode-peephole.h", 41 "regexp-bytecodes.h", 42 "regexp-dotprinter.h", 43 "regexp-error.h", 44 "regexp.h", 45 "regexp-macro-assembler.h", 46 "regexp-parser.h", 47 "regexp-stack.h", 48 "special-case.h", 49 ] 50 51 src = open(str(src_path)) 52 dst = open(str(dst_path), "w") 53 54 # 1. Rewrite includes of V8 regexp headers: 55 # Note that we exclude several headers and provide our own definitions. 56 excluded = "|".join(["flags", "utils", "result-vector"]) 57 regexp_include = re.compile(f'#include "src/regexp(?!/regexp-({excluded}).h)') 58 regexp_include_new = '#include "irregexp/imported' 59 60 # 2. Remove includes of other V8 headers 61 other_include = re.compile('#include "src/') 62 63 # 3. If needed, add '#include "irregexp/RegExpShim.h"'. 64 # Note: We get a little fancy to ensure that header files are 65 # in alphabetic order. `need_to_add_shim` is true if we still 66 # have to add the shim header in this file. `adding_shim_now` 67 # is true if we have found a '#include "src/*' and we are just 68 # waiting to find an empty line so that we can insert the shim 69 # header in the right place. 70 need_to_add_shim = src_path.name in need_shim 71 adding_shim_now = False 72 73 for line in src: 74 if adding_shim_now: 75 if line == "\n": 76 dst.write('#include "irregexp/RegExpShim.h"\n') 77 need_to_add_shim = False 78 adding_shim_now = False 79 80 if regexp_include.search(line): 81 dst.write(re.sub(regexp_include, regexp_include_new, line)) 82 elif other_include.search(line): 83 if need_to_add_shim: 84 adding_shim_now = True 85 else: 86 dst.write(line) 87 88 89 def import_from(srcdir, dstdir): 90 excluded = [ 91 "DIR_METADATA", 92 "OWNERS", 93 "regexp.cc", 94 "regexp-flags.h", 95 "regexp-result-vector.cc", 96 "regexp-result-vector.h", 97 "regexp-utils.cc", 98 "regexp-utils.h", 99 "regexp-macro-assembler-arch.h", 100 ] 101 102 for file in srcdir.iterdir(): 103 if file.is_dir(): 104 continue 105 if str(file.name) in excluded: 106 continue 107 copy_and_update_includes(file, dstdir / "imported" / file.name) 108 109 110 if __name__ == "__main__": 111 import argparse 112 import tempfile 113 114 # This script should be run from js/src/irregexp to work correctly. 115 current_path = Path(os.getcwd()) 116 expected_path = "js/src/irregexp" 117 if not current_path.match(expected_path): 118 raise RuntimeError("%s must be run from %s" % (sys.argv[0], expected_path)) 119 120 parser = argparse.ArgumentParser(description="Import irregexp from v8") 121 parser.add_argument("-p", "--path", help="path to v8/src/regexp", required=False) 122 args = parser.parse_args() 123 124 if args.path: 125 src_path = Path(args.path) 126 provided_path = "the command-line" 127 elif "TASK_ID" in os.environ: 128 src_path = Path("/builds/worker/v8/") 129 subprocess.run("git pull origin master", check=True, shell=True, cwd=src_path) 130 131 src_path = Path("/builds/worker/v8/src/regexp") 132 provided_path = "the hardcoded path in the taskcluster image" 133 elif "V8_GIT" in os.environ: 134 src_path = Path(os.environ["V8_GIT"]) 135 provided_path = "the V8_GIT environment variable" 136 else: 137 tempdir = tempfile.TemporaryDirectory() 138 v8_git = "https://github.com/v8/v8.git" 139 clone = "git clone --depth 1 %s %s" % (v8_git, tempdir.name) 140 os.system(clone) 141 src_path = Path(tempdir.name) / "src/regexp" 142 provided_path = "the temporary git checkout" 143 144 if not (src_path / "regexp.h").exists(): 145 print("Could not find regexp.h in the path provided from", provided_path) 146 print("Usage:\n import-irregexp.py [--path <path/to/v8/src/regexp>]") 147 sys.exit(1) 148 149 if "MACH_VENDOR" not in os.environ: 150 print( 151 "Running this script outside ./mach vendor is not recommended - ", 152 "You will need to update moz.yaml manually", 153 ) 154 print("We recommend instead `./mach vendor js/src/irregexp/moz.yaml`") 155 response = input("Type Y to continue... ") 156 if response.lower() != "y": 157 sys.exit(1) 158 159 import_from(src_path, current_path)