json_gn_editor.py (24329B)
1 # Lint as: python3 2 # Copyright 2021 The Chromium Authors 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 '''Helper script to use GN's JSON interface to make changes.''' 6 7 from __future__ import annotations 8 9 import contextlib 10 import copy 11 import dataclasses 12 import json 13 import logging 14 import os 15 import pathlib 16 import re 17 import shutil 18 import subprocess 19 import sys 20 21 from typing import Dict, Iterator, List, Optional, Tuple 22 23 _SRC_PATH = pathlib.Path(__file__).resolve().parents[2] 24 25 _BUILD_ANDROID_GYP_PATH = _SRC_PATH / 'build/android/gyp' 26 if str(_BUILD_ANDROID_GYP_PATH) not in sys.path: 27 sys.path.append(str(_BUILD_ANDROID_GYP_PATH)) 28 29 from util import build_utils 30 31 # Refer to parse_tree.cc for GN AST implementation details: 32 # https://gn.googlesource.com/gn/+/refs/heads/main/src/gn/parse_tree.cc 33 # These constants should match corresponding entries in parse_tree.cc. 34 # TODO: Add high-level details for the expected data structure. 35 NODE_CHILD = 'child' 36 NODE_TYPE = 'type' 37 NODE_VALUE = 'value' 38 BEFORE_COMMENT = 'before_comment' 39 SUFFIX_COMMENT = 'suffix_comment' 40 AFTER_COMMENT = 'after_comment' 41 42 43 @contextlib.contextmanager 44 def _backup_and_restore_file_contents(path: str): 45 with open(path) as f: 46 contents = f.read() 47 try: 48 yield 49 finally: 50 # Ensure that the timestamp is updated since otherwise ninja will not 51 # re-build relevant targets with the original file. 52 with open(path, 'w') as f: 53 f.write(contents) 54 55 56 def _build_targets_output( 57 out_dir: str, 58 targets: List[str], 59 should_print: Optional[bool] = None) -> Optional[str]: 60 env = os.environ.copy() 61 if should_print is None: 62 should_print = logging.getLogger().isEnabledFor(logging.DEBUG) 63 # Ensuring ninja does not attempt to summarize the build results in slightly 64 # faster builds. This script does many builds so this time can add up. 65 if 'NINJA_SUMMARIZE_BUILD' in env: 66 del env['NINJA_SUMMARIZE_BUILD'] 67 proc = subprocess.Popen(['autoninja', '-C', out_dir] + targets, 68 stdout=subprocess.PIPE, 69 stderr=subprocess.STDOUT, 70 env=env, 71 text=True) 72 lines = [] 73 prev_line = '' 74 width = shutil.get_terminal_size().columns 75 while proc.poll() is None: 76 line = proc.stdout.readline() 77 lines.append(line) 78 if should_print: 79 if prev_line.startswith('[') and line.startswith('['): 80 # Shrink the line according to terminal size. 81 msg = line.rstrip() 82 if len(msg) > width: 83 # 5 = 3 (Ellipsis) + 2 (header) 84 length_to_show = width - 5 85 msg = f'{msg[:2]}...{msg[-length_to_show:]}' 86 # \r to return the carriage to the beginning of line, \033[K to 87 # replace the normal \n to erase until the end of the line. This 88 # allows ninja output for successful targets to overwrite each 89 # other. 90 msg = f'\r{msg}\033[K' 91 elif prev_line.startswith('['): 92 # Since the previous line likely did not include a newline, an 93 # extra newline is needed to avoid the current line being 94 # appended to the previous line. 95 msg = f'\n{line}' 96 else: 97 msg = line 98 print(msg, end='') 99 prev_line = line 100 if proc.returncode != 0: 101 return None 102 return ''.join(lines) 103 104 105 def _generate_project_json_content(out_dir: str) -> str: 106 build_utils.CheckOutput(['gn', 'gen', '--ide=json', out_dir]) 107 with open(os.path.join(out_dir, 'project.json')) as f: 108 return f.read() 109 110 111 @dataclasses.dataclass 112 class DepList: 113 """Represents a dep list assignment in GN.""" 114 target_name: Optional[str] # The name of the target containing the list. 115 variable_name: str # Left-hand side variable name the list is assigned to. 116 child_nodes: List[dict] # Right-hand side list of nodes. 117 operation: str # The assignment operation, whether += or =. 118 119 120 class BuildFile: 121 """Represents the contents of a BUILD.gn file.""" 122 def __init__(self, 123 build_gn_path: str, 124 root_gn_path: pathlib.Path, 125 *, 126 dryrun: bool = False): 127 self._root = root_gn_path 128 self._rel_path = os.path.relpath(build_gn_path, root_gn_path) 129 self._gn_rel_path = '//' + os.path.dirname(self._rel_path) 130 self._full_path = os.path.abspath(build_gn_path) 131 self._skip_write_content = dryrun 132 133 def __enter__(self): 134 output = build_utils.CheckOutput( 135 ['gn', 'format', '--dump-tree=json', self._full_path]) 136 self._content = json.loads(output) 137 self._original_content = json.dumps(self._content) 138 return self 139 140 def __exit__(self, exc, value, tb): 141 if not self._skip_write_content: 142 self.write_content_to_file() 143 144 # See: https://gist.github.com/sgraham/bd9ffee312f307d5f417019a9c0f0777 145 def _find_all(self, match_fn): 146 results = [] 147 148 def get_target_name(node) -> Optional[str]: 149 """Example format (with irrelevant fields omitted): 150 { 151 "child": [ { 152 "child": [ { 153 "type": "LITERAL", 154 "value": "\"hello_world_java\"" 155 } ], 156 "type": "LIST" 157 }, { 158 ... 159 } ], 160 "type": "FUNCTION", 161 "value": "java_library" 162 } 163 164 Example return: hello_world_java 165 """ 166 if node.get(NODE_TYPE) != 'FUNCTION': 167 return None 168 children = node.get(NODE_CHILD) 169 if not children: 170 return None 171 first_child = children[0] 172 if first_child.get(NODE_TYPE) != 'LIST': 173 return None 174 grand_children = first_child.get(NODE_CHILD) 175 if not grand_children: 176 return None 177 grand_child = grand_children[0] 178 if grand_child.get(NODE_TYPE) != 'LITERAL': 179 return None 180 name = grand_child.get(NODE_VALUE) 181 if name.startswith('"'): 182 return name[1:-1] 183 return name 184 185 def recursive_find(root, last_known_target=None): 186 target_name = get_target_name(root) or last_known_target 187 matched = match_fn(root) 188 if matched is not None: 189 results.append((target_name, matched)) 190 return 191 children = root.get(NODE_CHILD) 192 if children: 193 for child in children: 194 recursive_find(child, last_known_target=target_name) 195 196 recursive_find(self._content) 197 return results 198 199 def _normalize(self, 200 name: Optional[str], 201 abs_path: bool = True, 202 allow_relative: bool = False): 203 """Returns the absolute GN path to the target with |name|. 204 205 This method normalizes target names, assuming that relative targets are 206 referenced based on the current file, allowing targets to be compared 207 by name to determine whether they are the same or not. 208 209 If |abs_path| is true, the path is always converted to an absolute path 210 before further processing. Otherwise no absolute path checks are 211 performed. 212 213 If |allow_relative| is true, the path returned may be relative to the 214 current build file when possible, i.e. //base:java will be :java in 215 base/BUILD.gn. 216 217 Given the current file is chrome/android/BUILD.gn: 218 219 # Removes surrounding quotation marks. 220 "//chrome/android:chrome_java" -> //chrome/android:chrome_java 221 222 # Makes relative paths absolute. 223 :chrome_java -> //chrome/android:chrome_java 224 225 # Spells out GN shorthands for basenames. 226 //chrome/android -> //chrome/android:android 227 """ 228 if not name: 229 return '' 230 if name.startswith('"'): 231 name = name[1:-1] 232 if not name.startswith('//') and abs_path: 233 name = self._gn_rel_path + name 234 if not ':' in name: 235 name += ':' + os.path.basename(name) 236 if allow_relative: 237 base_path, target_name = name.split(':') 238 if base_path == self._gn_rel_path: 239 return ':' + target_name 240 return name 241 242 def _find_all_list_assignments(self): 243 def match_list_assignments(node): 244 r"""Matches and returns the list being assigned. 245 246 Binary node (with an operation such as = or +=) 247 / \ 248 / \ 249 name list of nodes 250 251 Returns (name, list of nodes, op) 252 """ 253 if node.get(NODE_TYPE) != 'BINARY': 254 return None 255 operation = node.get(NODE_VALUE) 256 children = node.get(NODE_CHILD) 257 assert len(children) == 2, ( 258 'Binary nodes should have two child nodes, but the node is: ' 259 f'{node}') 260 left_child, right_child = children 261 if left_child.get(NODE_TYPE) != 'IDENTIFIER': 262 return None 263 name = left_child.get(NODE_VALUE) 264 if right_child.get(NODE_TYPE) != 'LIST': 265 return None 266 list_of_nodes = right_child.get(NODE_CHILD) 267 return name, list_of_nodes, operation 268 269 return self._find_all(match_list_assignments) 270 271 def _find_all_deps_lists(self) -> Iterator[DepList]: 272 list_tuples = self._find_all_list_assignments() 273 for target_name, (var_name, node_list, operation) in list_tuples: 274 if (var_name == 'deps' or var_name.startswith('deps_') 275 or var_name.endswith('_deps') or '_deps_' in var_name): 276 yield DepList(target_name=target_name, 277 variable_name=var_name, 278 child_nodes=node_list, 279 operation=operation) 280 281 def _new_literal_node(self, value: str, begin_line: int = 1): 282 return { 283 'location': { 284 'begin_column': 1, 285 'begin_line': begin_line, 286 'end_column': 2, 287 'end_line': begin_line, 288 }, 289 'type': 'LITERAL', 290 'value': f'"{value}"' 291 } 292 293 def _clone_replacing_value(self, node_to_copy: Dict, new_dep_name: str): 294 """Clone the existing node to preserve line numbers and update name. 295 296 It is easier to clone an existing node around the same location, as the 297 actual dict looks like this: 298 { 299 'location': { 300 'begin_column': 5, 301 'begin_line': 137, 302 'end_column': 27, 303 'end_line': 137 304 }, 305 'type': 'LITERAL', 306 'value': '":anr_data_proto_java"' 307 } 308 309 Thus the new node to return should keep the same 'location' value (the 310 parser is tolerant as long as it's roughly in the correct spot) but 311 update the 'value' to the new dependency name. 312 """ 313 new_dep = copy.deepcopy(node_to_copy) 314 # Any comments associated with the previous dep would not apply. 315 for comment_key in (BEFORE_COMMENT, AFTER_COMMENT, SUFFIX_COMMENT): 316 new_dep.pop(comment_key, None) # Remove if exists. 317 new_dep[NODE_VALUE] = f'"{new_dep_name}"' 318 return new_dep 319 320 def add_deps(self, target: str, deps: List[str]) -> bool: 321 added_new_dep = False 322 normalized_target = self._normalize(target) 323 for dep_list in self._find_all_deps_lists(): 324 if dep_list.target_name is None: 325 continue 326 # Only modify the first assignment operation to the deps variable, 327 # otherwise if there are += operations, then the list of deps will 328 # be added multiple times to the same target's deps. 329 if dep_list.operation != '=': 330 continue 331 full_target_name = f'{self._gn_rel_path}:{dep_list.target_name}' 332 # Support both the exact name and the absolute GN target names 333 # starting with //. 334 if (target != dep_list.target_name 335 and normalized_target != full_target_name): 336 continue 337 if dep_list.variable_name != 'deps': 338 continue 339 existing_dep_names = set( 340 self._normalize(child.get(NODE_VALUE), abs_path=False) 341 for child in dep_list.child_nodes) 342 for new_dep_name in deps: 343 new_dep_name = self._normalize(new_dep_name, 344 allow_relative=True) 345 if new_dep_name in existing_dep_names: 346 logging.info( 347 f'Skipping existing {new_dep_name} in {target}.deps') 348 continue 349 logging.info(f'Adding {new_dep_name} to {target}.deps') 350 # If there are no existing child nodes, then create a new one. 351 # Otherwise clone an existing child node to ensure more accurate 352 # line numbers and possible better preserve comments. 353 if not dep_list.child_nodes: 354 new_dep = self._new_literal_node(new_dep_name) 355 else: 356 new_dep = self._clone_replacing_value( 357 dep_list.child_nodes[0], new_dep_name) 358 dep_list.child_nodes.append(new_dep) 359 added_new_dep = True 360 if not added_new_dep: 361 # This should match the string in bytecode_processor.py. 362 print(f'Unable to find {target}') 363 return added_new_dep 364 365 def search_deps(self, name_query: Optional[str], 366 path_query: Optional[str]) -> bool: 367 if path_query: 368 if not re.search(path_query, self._rel_path): 369 return False 370 elif not name_query: 371 print(self._rel_path) 372 return True 373 for dep_list in self._find_all_deps_lists(): 374 for child in dep_list.child_nodes: 375 # Typically searches run on non-absolute dep paths. 376 dep_name = self._normalize(child.get(NODE_VALUE), 377 abs_path=False) 378 if name_query and re.search(name_query, dep_name): 379 print(f'{self._rel_path}: {dep_name} in ' 380 f'{dep_list.target_name}.{dep_list.variable_name}') 381 return True 382 return False 383 384 def split_deps(self, original_dep_name: str, 385 new_dep_names: List[str]) -> bool: 386 split = False 387 for new_dep_name in new_dep_names: 388 if self._split_dep(original_dep_name, new_dep_name): 389 split = True 390 return split 391 392 def _split_dep(self, original_dep_name: str, new_dep_name: str) -> bool: 393 """Add |new_dep_name| to GN deps that contains |original_dep_name|. 394 395 Supports deps, public_deps, and other deps variables. 396 397 Works for explicitly assigning a list to deps: 398 deps = [ ..., "original_dep", ...] 399 # Becomes 400 deps = [ ..., "original_dep", "new_dep", ...] 401 Also works for appending a list to deps: 402 public_deps += [ ..., "original_dep", ...] 403 # Becomes 404 public_deps += [ ..., "original_dep", "new_dep", ...] 405 406 Does not work for assigning or appending variables to deps: 407 deps = other_list_of_deps # Does NOT check other_list_of_deps. 408 # Becomes (no changes) 409 deps = other_list_of_deps 410 411 Does not work with parameter expansion, i.e. $variables. 412 413 Returns whether the new dep was added one or more times. 414 """ 415 for dep_name in (original_dep_name, new_dep_name): 416 assert dep_name.startswith('//'), ( 417 f'Absolute GN path required, starting with //: {dep_name}') 418 419 added_new_dep = False 420 normalized_original_dep_name = self._normalize(original_dep_name) 421 normalized_new_dep_name = self._normalize(new_dep_name) 422 for dep_list in self._find_all_deps_lists(): 423 original_dep_idx = None 424 new_dep_already_exists = False 425 for idx, child in enumerate(dep_list.child_nodes): 426 dep_name = self._normalize(child.get(NODE_VALUE)) 427 if dep_name == normalized_original_dep_name: 428 original_dep_idx = idx 429 if dep_name == normalized_new_dep_name: 430 new_dep_already_exists = True 431 if original_dep_idx is not None and not new_dep_already_exists: 432 if dep_list.target_name is None: 433 target_str = self._gn_rel_path 434 else: 435 target_str = f'{self._gn_rel_path}:{dep_list.target_name}' 436 location = f"{target_str}'s {dep_list.variable_name} variable" 437 logging.info(f'Adding {new_dep_name} to {location}') 438 new_dep = self._clone_replacing_value( 439 dep_list.child_nodes[original_dep_idx], new_dep_name) 440 # Add the new dep after the existing dep to preserve comments 441 # before the existing dep. 442 dep_list.child_nodes.insert(original_dep_idx + 1, new_dep) 443 added_new_dep = True 444 445 return added_new_dep 446 447 def remove_deps(self, 448 dep_names: List[str], 449 out_dir: str, 450 targets: List[str], 451 target_name_filter: Optional[str], 452 inline_mode: bool = False) -> Tuple[bool, str]: 453 if not inline_mode: 454 deps_to_remove = dep_names 455 else: 456 # If the first dep cannot be removed (or is not found) then in the 457 # case of inlining we can skip this file for the rest of the deps. 458 first_dep = dep_names[0] 459 if not self._remove_deps([first_dep], out_dir, targets, 460 target_name_filter): 461 return False 462 deps_to_remove = dep_names[1:] 463 return self._remove_deps(deps_to_remove, out_dir, targets, 464 target_name_filter) 465 466 def _remove_deps(self, dep_names: List[str], out_dir: str, 467 targets: List[str], 468 target_name_filter: Optional[str]) -> Tuple[bool, str]: 469 """Remove |dep_names| if the target can still be built in |out_dir|. 470 471 Supports deps, public_deps, and other deps variables. 472 473 Works for explicitly assigning a list to deps: 474 deps = [ ..., "original_dep", ...] 475 # Becomes 476 deps = [ ..., ...] 477 478 Does not work with parameter expansion, i.e. $variables. 479 480 Returns whether any deps were removed. 481 """ 482 normalized_dep_names = set() 483 for dep_name in dep_names: 484 assert dep_name.startswith('//'), ( 485 f'Absolute GN path required, starting with //: {dep_name}') 486 normalized_dep_names.add(self._normalize(dep_name)) 487 488 removed_dep = False 489 for dep_list in self._find_all_deps_lists(): 490 child_deps_to_remove = [ 491 c for c in dep_list.child_nodes 492 if self._normalize(c.get(NODE_VALUE)) in normalized_dep_names 493 ] 494 if not child_deps_to_remove: 495 continue 496 497 if dep_list.target_name is None: 498 target_name_str = self._gn_rel_path 499 else: 500 target_name_str = f'{self._gn_rel_path}:{dep_list.target_name}' 501 if (target_name_filter is not None and 502 re.search(target_name_filter, target_name_str) is None): 503 logging.info(f'Skip: Since re.search("{target_name_filter}", ' 504 f'"{target_name_str}") is None.') 505 continue 506 507 location = f"{target_name_str}'s {dep_list.variable_name} variable" 508 expected_json = _generate_project_json_content(out_dir) 509 num_to_remove = len(child_deps_to_remove) 510 for remove_idx, child_dep in enumerate(child_deps_to_remove): 511 child_dep_name = self._normalize(child_dep.get(NODE_VALUE)) 512 idx_to_remove = dep_list.child_nodes.index(child_dep) 513 logging.info(f'({remove_idx + 1}/{num_to_remove}) Found ' 514 f'{child_dep_name} in {location}.') 515 child_to_remove = dep_list.child_nodes[idx_to_remove] 516 can_remove_dep = False 517 with _backup_and_restore_file_contents(self._full_path): 518 dep_list.child_nodes.remove(child_to_remove) 519 self.write_content_to_file() 520 # Immediately restore deps_list's original value in case the 521 # following build is interrupted. We don't want the 522 # intermediate untested value to be written as the final 523 # build file. 524 dep_list.child_nodes.insert(idx_to_remove, child_to_remove) 525 if expected_json is not None: 526 # If no changes to project.json was detected, this means 527 # the current target is not part of out_dir's build and 528 # cannot be removed even if the build succeeds. 529 after_json = _generate_project_json_content(out_dir) 530 if expected_json == after_json: 531 # If one change in this list isn't part of the 532 # build, no need to try any other in this list. 533 logging.info('Skip: No changes to project.json.') 534 break 535 536 # Avoids testing every dep removal for the same list. 537 expected_json = None 538 if self._can_still_build_everything(out_dir, targets): 539 can_remove_dep = True 540 if not can_remove_dep: 541 continue 542 543 dep_list.child_nodes.remove(child_to_remove) 544 # Comments before a target can apply to the targets after. 545 if (BEFORE_COMMENT in child_to_remove 546 and idx_to_remove < len(dep_list.child_nodes)): 547 child_after = dep_list.child_nodes[idx_to_remove] 548 if BEFORE_COMMENT not in child_after: 549 child_after[BEFORE_COMMENT] = [] 550 child_after[BEFORE_COMMENT][:] = ( 551 child_to_remove[BEFORE_COMMENT] + 552 child_after[BEFORE_COMMENT]) 553 # Comments after or behind a target don't make sense to re- 554 # position, simply ignore AFTER_COMMENT and SUFFIX_COMMENT. 555 removed_dep = True 556 logging.info(f'Removed {child_dep_name} from {location}.') 557 return removed_dep 558 559 def _can_still_build_everything(self, out_dir: str, 560 targets: List[str]) -> bool: 561 output = _build_targets_output(out_dir, targets) 562 if output is None: 563 logging.info('Ninja failed to build all targets') 564 return False 565 # If ninja did not re-build anything, then the target changed is not 566 # among the targets being built. Avoid this change as it's not been 567 # tested/used. 568 if 'ninja: no work to do.' in output: 569 logging.info('Ninja did not find any targets to build') 570 return False 571 return True 572 573 def write_content_to_file(self) -> None: 574 current_content = json.dumps(self._content) 575 if current_content != self._original_content: 576 subprocess.run( 577 ['gn', 'format', '--read-tree=json', self._full_path], 578 text=True, 579 check=True, 580 input=current_content)