tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

json_gn_editor.py (24329B)


      1 # Lint as: python3
      2 # Copyright 2021 The Chromium Authors
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 '''Helper script to use GN's JSON interface to make changes.'''
      6 
      7 from __future__ import annotations
      8 
      9 import contextlib
     10 import copy
     11 import dataclasses
     12 import json
     13 import logging
     14 import os
     15 import pathlib
     16 import re
     17 import shutil
     18 import subprocess
     19 import sys
     20 
     21 from typing import Dict, Iterator, List, Optional, Tuple
     22 
     23 _SRC_PATH = pathlib.Path(__file__).resolve().parents[2]
     24 
     25 _BUILD_ANDROID_GYP_PATH = _SRC_PATH / 'build/android/gyp'
     26 if str(_BUILD_ANDROID_GYP_PATH) not in sys.path:
     27    sys.path.append(str(_BUILD_ANDROID_GYP_PATH))
     28 
     29 from util import build_utils
     30 
     31 # Refer to parse_tree.cc for GN AST implementation details:
     32 # https://gn.googlesource.com/gn/+/refs/heads/main/src/gn/parse_tree.cc
     33 # These constants should match corresponding entries in parse_tree.cc.
     34 # TODO: Add high-level details for the expected data structure.
     35 NODE_CHILD = 'child'
     36 NODE_TYPE = 'type'
     37 NODE_VALUE = 'value'
     38 BEFORE_COMMENT = 'before_comment'
     39 SUFFIX_COMMENT = 'suffix_comment'
     40 AFTER_COMMENT = 'after_comment'
     41 
     42 
     43 @contextlib.contextmanager
     44 def _backup_and_restore_file_contents(path: str):
     45    with open(path) as f:
     46        contents = f.read()
     47    try:
     48        yield
     49    finally:
     50        # Ensure that the timestamp is updated since otherwise ninja will not
     51        # re-build relevant targets with the original file.
     52        with open(path, 'w') as f:
     53            f.write(contents)
     54 
     55 
     56 def _build_targets_output(
     57        out_dir: str,
     58        targets: List[str],
     59        should_print: Optional[bool] = None) -> Optional[str]:
     60    env = os.environ.copy()
     61    if should_print is None:
     62        should_print = logging.getLogger().isEnabledFor(logging.DEBUG)
     63    # Ensuring ninja does not attempt to summarize the build results in slightly
     64    # faster builds. This script does many builds so this time can add up.
     65    if 'NINJA_SUMMARIZE_BUILD' in env:
     66        del env['NINJA_SUMMARIZE_BUILD']
     67    proc = subprocess.Popen(['autoninja', '-C', out_dir] + targets,
     68                            stdout=subprocess.PIPE,
     69                            stderr=subprocess.STDOUT,
     70                            env=env,
     71                            text=True)
     72    lines = []
     73    prev_line = ''
     74    width = shutil.get_terminal_size().columns
     75    while proc.poll() is None:
     76        line = proc.stdout.readline()
     77        lines.append(line)
     78        if should_print:
     79            if prev_line.startswith('[') and line.startswith('['):
     80                # Shrink the line according to terminal size.
     81                msg = line.rstrip()
     82                if len(msg) > width:
     83                    # 5 = 3 (Ellipsis) + 2 (header)
     84                    length_to_show = width - 5
     85                    msg = f'{msg[:2]}...{msg[-length_to_show:]}'
     86                # \r to return the carriage to the beginning of line, \033[K to
     87                # replace the normal \n to erase until the end of the line. This
     88                # allows ninja output for successful targets to overwrite each
     89                # other.
     90                msg = f'\r{msg}\033[K'
     91            elif prev_line.startswith('['):
     92                # Since the previous line likely did not include a newline, an
     93                # extra newline is needed to avoid the current line being
     94                # appended to the previous line.
     95                msg = f'\n{line}'
     96            else:
     97                msg = line
     98            print(msg, end='')
     99        prev_line = line
    100    if proc.returncode != 0:
    101        return None
    102    return ''.join(lines)
    103 
    104 
    105 def _generate_project_json_content(out_dir: str) -> str:
    106    build_utils.CheckOutput(['gn', 'gen', '--ide=json', out_dir])
    107    with open(os.path.join(out_dir, 'project.json')) as f:
    108        return f.read()
    109 
    110 
    111 @dataclasses.dataclass
    112 class DepList:
    113    """Represents a dep list assignment in GN."""
    114    target_name: Optional[str]  # The name of the target containing the list.
    115    variable_name: str  # Left-hand side variable name the list is assigned to.
    116    child_nodes: List[dict]  # Right-hand side list of nodes.
    117    operation: str  # The assignment operation, whether += or =.
    118 
    119 
    120 class BuildFile:
    121    """Represents the contents of a BUILD.gn file."""
    122    def __init__(self,
    123                 build_gn_path: str,
    124                 root_gn_path: pathlib.Path,
    125                 *,
    126                 dryrun: bool = False):
    127        self._root = root_gn_path
    128        self._rel_path = os.path.relpath(build_gn_path, root_gn_path)
    129        self._gn_rel_path = '//' + os.path.dirname(self._rel_path)
    130        self._full_path = os.path.abspath(build_gn_path)
    131        self._skip_write_content = dryrun
    132 
    133    def __enter__(self):
    134        output = build_utils.CheckOutput(
    135            ['gn', 'format', '--dump-tree=json', self._full_path])
    136        self._content = json.loads(output)
    137        self._original_content = json.dumps(self._content)
    138        return self
    139 
    140    def __exit__(self, exc, value, tb):
    141        if not self._skip_write_content:
    142            self.write_content_to_file()
    143 
    144    # See: https://gist.github.com/sgraham/bd9ffee312f307d5f417019a9c0f0777
    145    def _find_all(self, match_fn):
    146        results = []
    147 
    148        def get_target_name(node) -> Optional[str]:
    149            """Example format (with irrelevant fields omitted):
    150            {
    151                "child": [ {
    152                    "child": [ {
    153                        "type": "LITERAL",
    154                        "value": "\"hello_world_java\""
    155                    } ],
    156                    "type": "LIST"
    157                }, {
    158                    ...
    159                } ],
    160                "type": "FUNCTION",
    161                "value": "java_library"
    162            }
    163 
    164            Example return: hello_world_java
    165            """
    166            if node.get(NODE_TYPE) != 'FUNCTION':
    167                return None
    168            children = node.get(NODE_CHILD)
    169            if not children:
    170                return None
    171            first_child = children[0]
    172            if first_child.get(NODE_TYPE) != 'LIST':
    173                return None
    174            grand_children = first_child.get(NODE_CHILD)
    175            if not grand_children:
    176                return None
    177            grand_child = grand_children[0]
    178            if grand_child.get(NODE_TYPE) != 'LITERAL':
    179                return None
    180            name = grand_child.get(NODE_VALUE)
    181            if name.startswith('"'):
    182                return name[1:-1]
    183            return name
    184 
    185        def recursive_find(root, last_known_target=None):
    186            target_name = get_target_name(root) or last_known_target
    187            matched = match_fn(root)
    188            if matched is not None:
    189                results.append((target_name, matched))
    190                return
    191            children = root.get(NODE_CHILD)
    192            if children:
    193                for child in children:
    194                    recursive_find(child, last_known_target=target_name)
    195 
    196        recursive_find(self._content)
    197        return results
    198 
    199    def _normalize(self,
    200                   name: Optional[str],
    201                   abs_path: bool = True,
    202                   allow_relative: bool = False):
    203        """Returns the absolute GN path to the target with |name|.
    204 
    205        This method normalizes target names, assuming that relative targets are
    206        referenced based on the current file, allowing targets to be compared
    207        by name to determine whether they are the same or not.
    208 
    209        If |abs_path| is true, the path is always converted to an absolute path
    210        before further processing. Otherwise no absolute path checks are
    211        performed.
    212 
    213        If |allow_relative| is true, the path returned may be relative to the
    214        current build file when possible, i.e. //base:java will be :java in
    215        base/BUILD.gn.
    216 
    217        Given the current file is chrome/android/BUILD.gn:
    218 
    219        # Removes surrounding quotation marks.
    220        "//chrome/android:chrome_java" -> //chrome/android:chrome_java
    221 
    222        # Makes relative paths absolute.
    223        :chrome_java -> //chrome/android:chrome_java
    224 
    225        # Spells out GN shorthands for basenames.
    226        //chrome/android -> //chrome/android:android
    227        """
    228        if not name:
    229            return ''
    230        if name.startswith('"'):
    231            name = name[1:-1]
    232        if not name.startswith('//') and abs_path:
    233            name = self._gn_rel_path + name
    234        if not ':' in name:
    235            name += ':' + os.path.basename(name)
    236        if allow_relative:
    237            base_path, target_name = name.split(':')
    238            if base_path == self._gn_rel_path:
    239                return ':' + target_name
    240        return name
    241 
    242    def _find_all_list_assignments(self):
    243        def match_list_assignments(node):
    244            r"""Matches and returns the list being assigned.
    245 
    246            Binary node (with an operation such as = or +=)
    247             /       \
    248            /         \
    249            name      list of nodes
    250 
    251            Returns (name, list of nodes, op)
    252            """
    253            if node.get(NODE_TYPE) != 'BINARY':
    254                return None
    255            operation = node.get(NODE_VALUE)
    256            children = node.get(NODE_CHILD)
    257            assert len(children) == 2, (
    258                'Binary nodes should have two child nodes, but the node is: '
    259                f'{node}')
    260            left_child, right_child = children
    261            if left_child.get(NODE_TYPE) != 'IDENTIFIER':
    262                return None
    263            name = left_child.get(NODE_VALUE)
    264            if right_child.get(NODE_TYPE) != 'LIST':
    265                return None
    266            list_of_nodes = right_child.get(NODE_CHILD)
    267            return name, list_of_nodes, operation
    268 
    269        return self._find_all(match_list_assignments)
    270 
    271    def _find_all_deps_lists(self) -> Iterator[DepList]:
    272        list_tuples = self._find_all_list_assignments()
    273        for target_name, (var_name, node_list, operation) in list_tuples:
    274            if (var_name == 'deps' or var_name.startswith('deps_')
    275                    or var_name.endswith('_deps') or '_deps_' in var_name):
    276                yield DepList(target_name=target_name,
    277                              variable_name=var_name,
    278                              child_nodes=node_list,
    279                              operation=operation)
    280 
    281    def _new_literal_node(self, value: str, begin_line: int = 1):
    282        return {
    283            'location': {
    284                'begin_column': 1,
    285                'begin_line': begin_line,
    286                'end_column': 2,
    287                'end_line': begin_line,
    288            },
    289            'type': 'LITERAL',
    290            'value': f'"{value}"'
    291        }
    292 
    293    def _clone_replacing_value(self, node_to_copy: Dict, new_dep_name: str):
    294        """Clone the existing node to preserve line numbers and update name.
    295 
    296        It is easier to clone an existing node around the same location, as the
    297        actual dict looks like this:
    298        {
    299            'location': {
    300                'begin_column': 5,
    301                'begin_line': 137,
    302                'end_column': 27,
    303                'end_line': 137
    304            },
    305            'type': 'LITERAL',
    306            'value': '":anr_data_proto_java"'
    307        }
    308 
    309        Thus the new node to return should keep the same 'location' value (the
    310        parser is tolerant as long as it's roughly in the correct spot) but
    311        update the 'value' to the new dependency name.
    312        """
    313        new_dep = copy.deepcopy(node_to_copy)
    314        # Any comments associated with the previous dep would not apply.
    315        for comment_key in (BEFORE_COMMENT, AFTER_COMMENT, SUFFIX_COMMENT):
    316            new_dep.pop(comment_key, None)  # Remove if exists.
    317        new_dep[NODE_VALUE] = f'"{new_dep_name}"'
    318        return new_dep
    319 
    320    def add_deps(self, target: str, deps: List[str]) -> bool:
    321        added_new_dep = False
    322        normalized_target = self._normalize(target)
    323        for dep_list in self._find_all_deps_lists():
    324            if dep_list.target_name is None:
    325                continue
    326            # Only modify the first assignment operation to the deps variable,
    327            # otherwise if there are += operations, then the list of deps will
    328            # be added multiple times to the same target's deps.
    329            if dep_list.operation != '=':
    330                continue
    331            full_target_name = f'{self._gn_rel_path}:{dep_list.target_name}'
    332            # Support both the exact name and the absolute GN target names
    333            # starting with //.
    334            if (target != dep_list.target_name
    335                    and normalized_target != full_target_name):
    336                continue
    337            if dep_list.variable_name != 'deps':
    338                continue
    339            existing_dep_names = set(
    340                self._normalize(child.get(NODE_VALUE), abs_path=False)
    341                for child in dep_list.child_nodes)
    342            for new_dep_name in deps:
    343                new_dep_name = self._normalize(new_dep_name,
    344                                               allow_relative=True)
    345                if new_dep_name in existing_dep_names:
    346                    logging.info(
    347                        f'Skipping existing {new_dep_name} in {target}.deps')
    348                    continue
    349                logging.info(f'Adding {new_dep_name} to {target}.deps')
    350                # If there are no existing child nodes, then create a new one.
    351                # Otherwise clone an existing child node to ensure more accurate
    352                # line numbers and possible better preserve comments.
    353                if not dep_list.child_nodes:
    354                    new_dep = self._new_literal_node(new_dep_name)
    355                else:
    356                    new_dep = self._clone_replacing_value(
    357                        dep_list.child_nodes[0], new_dep_name)
    358                dep_list.child_nodes.append(new_dep)
    359                added_new_dep = True
    360        if not added_new_dep:
    361            # This should match the string in bytecode_processor.py.
    362            print(f'Unable to find {target}')
    363        return added_new_dep
    364 
    365    def search_deps(self, name_query: Optional[str],
    366                    path_query: Optional[str]) -> bool:
    367        if path_query:
    368            if not re.search(path_query, self._rel_path):
    369                return False
    370            elif not name_query:
    371                print(self._rel_path)
    372                return True
    373        for dep_list in self._find_all_deps_lists():
    374            for child in dep_list.child_nodes:
    375                # Typically searches run on non-absolute dep paths.
    376                dep_name = self._normalize(child.get(NODE_VALUE),
    377                                           abs_path=False)
    378                if name_query and re.search(name_query, dep_name):
    379                    print(f'{self._rel_path}: {dep_name} in '
    380                          f'{dep_list.target_name}.{dep_list.variable_name}')
    381                    return True
    382        return False
    383 
    384    def split_deps(self, original_dep_name: str,
    385                   new_dep_names: List[str]) -> bool:
    386        split = False
    387        for new_dep_name in new_dep_names:
    388            if self._split_dep(original_dep_name, new_dep_name):
    389                split = True
    390        return split
    391 
    392    def _split_dep(self, original_dep_name: str, new_dep_name: str) -> bool:
    393        """Add |new_dep_name| to GN deps that contains |original_dep_name|.
    394 
    395        Supports deps, public_deps, and other deps variables.
    396 
    397        Works for explicitly assigning a list to deps:
    398        deps = [ ..., "original_dep", ...]
    399        # Becomes
    400        deps = [ ..., "original_dep", "new_dep", ...]
    401        Also works for appending a list to deps:
    402        public_deps += [ ..., "original_dep", ...]
    403        # Becomes
    404        public_deps += [ ..., "original_dep", "new_dep", ...]
    405 
    406        Does not work for assigning or appending variables to deps:
    407        deps = other_list_of_deps # Does NOT check other_list_of_deps.
    408        # Becomes (no changes)
    409        deps = other_list_of_deps
    410 
    411        Does not work with parameter expansion, i.e. $variables.
    412 
    413        Returns whether the new dep was added one or more times.
    414        """
    415        for dep_name in (original_dep_name, new_dep_name):
    416            assert dep_name.startswith('//'), (
    417                f'Absolute GN path required, starting with //: {dep_name}')
    418 
    419        added_new_dep = False
    420        normalized_original_dep_name = self._normalize(original_dep_name)
    421        normalized_new_dep_name = self._normalize(new_dep_name)
    422        for dep_list in self._find_all_deps_lists():
    423            original_dep_idx = None
    424            new_dep_already_exists = False
    425            for idx, child in enumerate(dep_list.child_nodes):
    426                dep_name = self._normalize(child.get(NODE_VALUE))
    427                if dep_name == normalized_original_dep_name:
    428                    original_dep_idx = idx
    429                if dep_name == normalized_new_dep_name:
    430                    new_dep_already_exists = True
    431            if original_dep_idx is not None and not new_dep_already_exists:
    432                if dep_list.target_name is None:
    433                    target_str = self._gn_rel_path
    434                else:
    435                    target_str = f'{self._gn_rel_path}:{dep_list.target_name}'
    436                location = f"{target_str}'s {dep_list.variable_name} variable"
    437                logging.info(f'Adding {new_dep_name} to {location}')
    438                new_dep = self._clone_replacing_value(
    439                    dep_list.child_nodes[original_dep_idx], new_dep_name)
    440                # Add the new dep after the existing dep to preserve comments
    441                # before the existing dep.
    442                dep_list.child_nodes.insert(original_dep_idx + 1, new_dep)
    443                added_new_dep = True
    444 
    445        return added_new_dep
    446 
    447    def remove_deps(self,
    448                    dep_names: List[str],
    449                    out_dir: str,
    450                    targets: List[str],
    451                    target_name_filter: Optional[str],
    452                    inline_mode: bool = False) -> Tuple[bool, str]:
    453        if not inline_mode:
    454            deps_to_remove = dep_names
    455        else:
    456            # If the first dep cannot be removed (or is not found) then in the
    457            # case of inlining we can skip this file for the rest of the deps.
    458            first_dep = dep_names[0]
    459            if not self._remove_deps([first_dep], out_dir, targets,
    460                                     target_name_filter):
    461                return False
    462            deps_to_remove = dep_names[1:]
    463        return self._remove_deps(deps_to_remove, out_dir, targets,
    464                                 target_name_filter)
    465 
    466    def _remove_deps(self, dep_names: List[str], out_dir: str,
    467                     targets: List[str],
    468                     target_name_filter: Optional[str]) -> Tuple[bool, str]:
    469        """Remove |dep_names| if the target can still be built in |out_dir|.
    470 
    471        Supports deps, public_deps, and other deps variables.
    472 
    473        Works for explicitly assigning a list to deps:
    474        deps = [ ..., "original_dep", ...]
    475        # Becomes
    476        deps = [ ..., ...]
    477 
    478        Does not work with parameter expansion, i.e. $variables.
    479 
    480        Returns whether any deps were removed.
    481        """
    482        normalized_dep_names = set()
    483        for dep_name in dep_names:
    484            assert dep_name.startswith('//'), (
    485                f'Absolute GN path required, starting with //: {dep_name}')
    486            normalized_dep_names.add(self._normalize(dep_name))
    487 
    488        removed_dep = False
    489        for dep_list in self._find_all_deps_lists():
    490            child_deps_to_remove = [
    491                c for c in dep_list.child_nodes
    492                if self._normalize(c.get(NODE_VALUE)) in normalized_dep_names
    493            ]
    494            if not child_deps_to_remove:
    495                continue
    496 
    497            if dep_list.target_name is None:
    498                target_name_str = self._gn_rel_path
    499            else:
    500                target_name_str = f'{self._gn_rel_path}:{dep_list.target_name}'
    501            if (target_name_filter is not None and
    502                    re.search(target_name_filter, target_name_str) is None):
    503                logging.info(f'Skip: Since re.search("{target_name_filter}", '
    504                             f'"{target_name_str}") is None.')
    505                continue
    506 
    507            location = f"{target_name_str}'s {dep_list.variable_name} variable"
    508            expected_json = _generate_project_json_content(out_dir)
    509            num_to_remove = len(child_deps_to_remove)
    510            for remove_idx, child_dep in enumerate(child_deps_to_remove):
    511                child_dep_name = self._normalize(child_dep.get(NODE_VALUE))
    512                idx_to_remove = dep_list.child_nodes.index(child_dep)
    513                logging.info(f'({remove_idx + 1}/{num_to_remove}) Found '
    514                             f'{child_dep_name} in {location}.')
    515                child_to_remove = dep_list.child_nodes[idx_to_remove]
    516                can_remove_dep = False
    517                with _backup_and_restore_file_contents(self._full_path):
    518                    dep_list.child_nodes.remove(child_to_remove)
    519                    self.write_content_to_file()
    520                    # Immediately restore deps_list's original value in case the
    521                    # following build is interrupted. We don't want the
    522                    # intermediate untested value to be written as the final
    523                    # build file.
    524                    dep_list.child_nodes.insert(idx_to_remove, child_to_remove)
    525                    if expected_json is not None:
    526                        # If no changes to project.json was detected, this means
    527                        # the current target is not part of out_dir's build and
    528                        # cannot be removed even if the build succeeds.
    529                        after_json = _generate_project_json_content(out_dir)
    530                        if expected_json == after_json:
    531                            # If one change in this list isn't part of the
    532                            # build, no need to try any other in this list.
    533                            logging.info('Skip: No changes to project.json.')
    534                            break
    535 
    536                        # Avoids testing every dep removal for the same list.
    537                        expected_json = None
    538                    if self._can_still_build_everything(out_dir, targets):
    539                        can_remove_dep = True
    540                if not can_remove_dep:
    541                    continue
    542 
    543                dep_list.child_nodes.remove(child_to_remove)
    544                # Comments before a target can apply to the targets after.
    545                if (BEFORE_COMMENT in child_to_remove
    546                        and idx_to_remove < len(dep_list.child_nodes)):
    547                    child_after = dep_list.child_nodes[idx_to_remove]
    548                    if BEFORE_COMMENT not in child_after:
    549                        child_after[BEFORE_COMMENT] = []
    550                    child_after[BEFORE_COMMENT][:] = (
    551                        child_to_remove[BEFORE_COMMENT] +
    552                        child_after[BEFORE_COMMENT])
    553                # Comments after or behind a target don't make sense to re-
    554                # position, simply ignore AFTER_COMMENT and SUFFIX_COMMENT.
    555                removed_dep = True
    556                logging.info(f'Removed {child_dep_name} from {location}.')
    557        return removed_dep
    558 
    559    def _can_still_build_everything(self, out_dir: str,
    560                                    targets: List[str]) -> bool:
    561        output = _build_targets_output(out_dir, targets)
    562        if output is None:
    563            logging.info('Ninja failed to build all targets')
    564            return False
    565        # If ninja did not re-build anything, then the target changed is not
    566        # among the targets being built. Avoid this change as it's not been
    567        # tested/used.
    568        if 'ninja: no work to do.' in output:
    569            logging.info('Ninja did not find any targets to build')
    570            return False
    571        return True
    572 
    573    def write_content_to_file(self) -> None:
    574        current_content = json.dumps(self._content)
    575        if current_content != self._original_content:
    576            subprocess.run(
    577                ['gn', 'format', '--read-tree=json', self._full_path],
    578                text=True,
    579                check=True,
    580                input=current_content)