tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

update_bundle_filelist.py (11426B)


      1 #!/usr/bin/env python3
      2 # Copyright 2023 The Chromium Authors
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 """
      6 Updates .filelist files using data from corresponding .globlist files (or
      7 checks whether they are up to date).
      8 
      9 bundle_data targets require an explicit source list, but maintaining these large
     10 lists can be cumbersome. This script aims to simplify the process of updating
     11 these lists by either expanding globs to update file lists or check that an
     12 existing file list matches such an expansion (i.e., checking during presubmit).
     13 
     14 The .globlist file contains a list of globs that will be expanded to either
     15 compare or replace a corresponding .filelist. It is possible to exclude items
     16 from the file list with globs as well. These lines are prefixed with '-' and are
     17 processed in order, so be sure that exclusions succeed inclusions in the list of
     18 globs. Comments and empty lines are permitted in .globfiles; comments are
     19 prefixed with '#'.
     20 
     21 By convention, the base name of the .globlist and .filelist files matches the
     22 label of their corresponding bundle_data from the .gn file. In order to ensure
     23 that these filelists don't get stale, there should also be a PRESUBMIT.py
     24 which uses this script to check that list is up to date.
     25 
     26 By default, the script will update the file list to match the expanded globs.
     27 """
     28 
     29 import argparse
     30 import datetime
     31 import difflib
     32 import glob
     33 import os.path
     34 import re
     35 import subprocess
     36 import sys
     37 
     38 # Character to set colors in terminal. Taken, along with the printing routine
     39 # below, from update_deps.py.
     40 TERMINAL_ERROR_COLOR = '\033[91m'
     41 TERMINAL_RESET_COLOR = '\033[0m'
     42 
     43 _HEADER = """# Copyright %d The Chromium Authors
     44 # Use of this source code is governed by a BSD-style license that can be
     45 # found in the LICENSE file.
     46 # NOTE: this file is generated by build/ios/update_bundle_filelist.py
     47 #       If it requires updating, you should get a presubmit error with
     48 #       instructions on how to regenerate. Otherwise, do not edit.
     49 """ % (datetime.datetime.now().year)
     50 
     51 _HEADER_PATTERN = re.compile(r"""# Copyright [0-9]+ The Chromium Authors
     52 # Use of this source code is governed by a BSD-style license that can be
     53 # found in the LICENSE file.
     54 # NOTE: this file is generated by build/ios/update_bundle_filelist.py
     55 #       If it requires updating, you should get a presubmit error with
     56 #       instructions on how to regenerate. Otherwise, do not edit.
     57 """)
     58 
     59 _HEADER_HEIGHT = 6
     60 
     61 _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# push(ignore-relative)'
     62 _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR = '# pop(ignore-relative)'
     63 
     64 
     65 def parse_filelist(filelist_name):
     66  try:
     67    with open(filelist_name) as filelist:
     68      unfiltered = [l for l in filelist]
     69      header = ''.join(unfiltered[:_HEADER_HEIGHT])
     70      files = sorted(l.strip() for l in unfiltered[_HEADER_HEIGHT:])
     71      return (files, header)
     72  except Exception as e:
     73    print_error(f'Could not read file list: {filelist_name}', f'{type(e)}: {e}')
     74    return []
     75 
     76 
     77 def get_git_command_name():
     78  if sys.platform.startswith('win'):
     79    return 'git.bat'
     80  return 'git'
     81 
     82 
     83 def get_tracked_files(directory, globroot, repository_root_relative, verbose):
     84  try:
     85    if os.getcwd().startswith('/google/cog/cloud'):
     86      files = []
     87      for root, _, filenames in os.walk(directory):
     88        files.extend([os.path.join(root, f) for f in filenames])
     89      return set(files)
     90    cmd = [get_git_command_name(), 'ls-files', '--error-unmatch', directory]
     91    with subprocess.Popen(cmd,
     92                          stdout=subprocess.PIPE,
     93                          stderr=subprocess.PIPE,
     94                          cwd=globroot) as p:
     95      output = p.communicate()
     96      if p.returncode != 0:
     97        if verbose:
     98          print_error(
     99              f'Could not gather a list of tracked files in {directory}',
    100              f'{output[1]}')
    101        return set()
    102 
    103      files = [f.decode('utf-8') for f in output[0].splitlines()]
    104 
    105      # Need paths to be relative to directory in order to match expansions.
    106      # This should happen naturally due to cwd above, but we need to take
    107      # special care if relative to the repository root.
    108      if repository_root_relative:
    109        files = ['//' + f for f in files]
    110 
    111      # Handle Windows backslashes
    112      files = [f.replace('\\', '/') for f in files]
    113 
    114      return set(files)
    115 
    116  except Exception as e:
    117    if verbose:
    118      print_error(f'Could not gather a list of tracked files in {directory}',
    119                  f'{type(e)}: {e}')
    120    return set()
    121 
    122 
    123 def combine_potentially_repository_root_relative_paths(a, b):
    124  if b.startswith('//'):
    125    # If b is relative to the repository root, os.path will consider it absolute
    126    # and os.path.join will fail. In this case, we can simply concatenate the
    127    # paths.
    128    return (a + b, True)
    129  else:
    130    return (os.path.join(a, b), False)
    131 
    132 
    133 def parse_and_expand_globlist(globlist_name, glob_root):
    134  # The following expects glob_root not to end in a trailing slash.
    135  if glob_root.endswith('/'):
    136    glob_root = glob_root[:-1]
    137 
    138  check_expansions_outside_globlist_dir = True
    139  globlist_dir = os.path.dirname(globlist_name)
    140 
    141  with open(globlist_name) as globlist:
    142    # Paths in |files| and |to_check| must use unix separators. Using a set
    143    # ensures no unwanted duplicates. The files in |to_check| must be in the
    144    # globroot or a subdirectory.
    145    files = set()
    146    to_check = set()
    147    for g in globlist:
    148      g = g.strip()
    149 
    150      # Ignore blank lines
    151      if not g:
    152        continue
    153 
    154      # Toggle error checking.
    155      if g == _START_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR:
    156        check_expansions_outside_globlist_dir = False
    157      elif g == _STOP_IGNORE_EXPANSIONS_OUTSIDE_GLOBLIST_DIR:
    158        check_expansions_outside_globlist_dir = True
    159 
    160      # Ignore comments.
    161      if not g or g.startswith('#'):
    162        continue
    163 
    164      # Exclusions are prefixed with '-'.
    165      is_exclusion = g.startswith('-')
    166      if is_exclusion:
    167        g = g[1:]
    168 
    169      (combined,
    170       root_relative) = combine_potentially_repository_root_relative_paths(
    171           glob_root, g)
    172 
    173      prefix_size = len(glob_root)
    174      if not root_relative:
    175        # We need to account for the separator.
    176        prefix_size += 1
    177 
    178      expansion = glob.glob(combined, recursive=True)
    179 
    180      # Filter out directories.
    181      expansion = [f for f in expansion if os.path.isfile(f)]
    182 
    183      if check_expansions_outside_globlist_dir:
    184        for f in expansion:
    185          relative = os.path.relpath(f, globlist_dir)
    186          if relative.startswith('..'):
    187            raise Exception(f'Globlist expansion outside globlist dir: {f}')
    188 
    189      # Make relative to |glob_root|.
    190      expansion = [f[prefix_size:] for f in expansion]
    191 
    192      # Handle Windows backslashes
    193      expansion = [f.replace('\\', '/') for f in expansion]
    194 
    195      # Since paths in |expansion| only use unix separators, it is safe to
    196      # compare for both the purpose of exclusion and addition.
    197      if is_exclusion:
    198        files = files.difference(expansion)
    199      else:
    200        files = files.union(expansion)
    201 
    202    # Return a sorted list.
    203    return sorted(files)
    204 
    205 
    206 def compare_lists(a, b):
    207  differ = difflib.Differ()
    208  full_diff = differ.compare(a, b)
    209  lines = [d for d in full_diff if not d.startswith('  ')]
    210  additions = [l[2:] for l in lines if l.startswith('+ ')]
    211  removals = [l[2:] for l in lines if l.startswith('- ')]
    212  return (additions, removals)
    213 
    214 
    215 def write_filelist(filelist_name, files, header):
    216  try:
    217    with open(filelist_name, 'w', encoding='utf-8', newline='') as filelist:
    218      if not _HEADER_PATTERN.search(header):
    219        header = _HEADER
    220      filelist.write(header)
    221      for file in files:
    222        filelist.write(f'{file}\n')
    223  except Exception as e:
    224    print_error(f'Could not write file list: {filelist_name}',
    225                f'{type(e)}: {e}')
    226    return []
    227 
    228 
    229 def process_filelist(filelist, globlist, globroot, check=False, verbose=False):
    230  files_from_globlist = []
    231  try:
    232    files_from_globlist = parse_and_expand_globlist(globlist, globroot)
    233  except Exception as e:
    234    if verbose:
    235      print_error(f'Could not read glob list: {globlist}', f'{type(e)}: {e}')
    236    return 1
    237 
    238  (files, header) = parse_filelist(filelist)
    239 
    240  (additions, removals) = compare_lists(files, files_from_globlist)
    241  to_ignore = set()
    242 
    243  # Ignore additions of untracked files.
    244  if additions:
    245    directories = set([os.path.dirname(f) for f in additions])
    246    tracked_files = set()
    247    for d in directories:
    248      (combined,
    249       root_relative) = combine_potentially_repository_root_relative_paths(
    250           globroot, d)
    251      relative = os.path.relpath(combined, globroot)
    252      tracked_files = tracked_files.union(
    253          get_tracked_files(relative, globroot, root_relative, verbose))
    254    to_ignore = set(additions).difference(tracked_files)
    255    additions = [f for f in additions if f in tracked_files]
    256 
    257  files_from_globlist = [f for f in files_from_globlist if f not in to_ignore]
    258 
    259  if check:
    260    if not _HEADER_PATTERN.search(header):
    261      if verbose:
    262        print_error(f'Unexpected header for {filelist}', f'{header}')
    263      return 1
    264    if not additions and not removals:
    265      return 0
    266    if verbose:
    267      pretty_additions = ['+ ' + f for f in additions]
    268      pretty_removals = ['- ' + f for f in removals]
    269      pretty_diff = '\n'.join(pretty_additions + pretty_removals)
    270      print_error('File list does not match glob expansion', f'{pretty_diff}')
    271    return 1
    272  else:
    273    write_filelist(filelist, files_from_globlist, header)
    274    return 0
    275 
    276 
    277 def main(args):
    278  parser = argparse.ArgumentParser(
    279      description=__doc__, formatter_class=argparse.RawTextHelpFormatter)
    280  parser.add_argument('filelist', help='Contains one file per line')
    281  parser.add_argument('globlist',
    282                      help='Contains globs that, when expanded, '
    283                      'should match the filelist. Use '
    284                      '--help for details on syntax')
    285  parser.add_argument('globroot',
    286                      help='Directory from which globs are relative')
    287  parser.add_argument('-c',
    288                      '--check',
    289                      action='store_true',
    290                      help='Prevents modifying the file list')
    291  parser.add_argument('-v',
    292                      '--verbose',
    293                      action='store_true',
    294                      help='Use this to print details on differences')
    295  args = parser.parse_args()
    296  return process_filelist(args.filelist,
    297                          args.globlist,
    298                          args.globroot,
    299                          check=args.check,
    300                          verbose=args.verbose)
    301 
    302 
    303 def print_error(error_message, error_info):
    304  """ Print the `error_message` with additional `error_info` """
    305  color_start, color_end = adapted_color_for_output(TERMINAL_ERROR_COLOR,
    306                                                    TERMINAL_RESET_COLOR)
    307 
    308  error_message = color_start + 'ERROR: ' + error_message + color_end
    309  if len(error_info) > 0:
    310    error_message = error_message + '\n' + error_info
    311  print(error_message, file=sys.stderr)
    312 
    313 
    314 def adapted_color_for_output(color_start, color_end):
    315  """ Returns a the `color_start`, `color_end` tuple if the output is a
    316    terminal, or empty strings otherwise """
    317  if not sys.stdout.isatty():
    318    return '', ''
    319  return color_start, color_end
    320 
    321 
    322 if __name__ == '__main__':
    323  sys.exit(main(sys.argv[1:]))