tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

binary_sizes.py (21279B)


      1 #!/usr/bin/env vpython3
      2 #
      3 # Copyright 2020 The Chromium Authors
      4 # Use of this source code is governed by a BSD-style license that can be
      5 # found in the LICENSE file.
      6 '''Implements Chrome-Fuchsia package binary size checks.'''
      7 
      8 import argparse
      9 import collections
     10 import json
     11 import math
     12 import os
     13 import re
     14 import shutil
     15 import subprocess
     16 import sys
     17 import tempfile
     18 import time
     19 import traceback
     20 import uuid
     21 
     22 sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__),
     23                                             'test')))
     24 
     25 from common import DIR_SRC_ROOT, SDK_ROOT, get_host_tool_path
     26 
     27 PACKAGES_BLOBS_FILE = 'package_blobs.json'
     28 PACKAGES_SIZES_FILE = 'package_sizes.json'
     29 
     30 # Structure representing the compressed and uncompressed sizes for a Fuchsia
     31 # package.
     32 PackageSizes = collections.namedtuple('PackageSizes',
     33                                      ['compressed', 'uncompressed'])
     34 
     35 # Structure representing a Fuchsia package blob and its compressed and
     36 # uncompressed sizes.
     37 Blob = collections.namedtuple(
     38    'Blob', ['name', 'hash', 'compressed', 'uncompressed', 'is_counted'])
     39 
     40 
     41 def CreateSizesExternalDiagnostic(sizes_guid):
     42  """Creates a histogram external sizes diagnostic."""
     43 
     44  benchmark_diagnostic = {
     45      'type': 'GenericSet',
     46      'guid': str(sizes_guid),
     47      'values': ['sizes'],
     48  }
     49 
     50  return benchmark_diagnostic
     51 
     52 
     53 def CreateSizesHistogramItem(name, size, sizes_guid):
     54  """Create a performance dashboard histogram from the histogram template and
     55  binary size data."""
     56 
     57  # Chromium performance dashboard histogram containing binary size data.
     58  histogram = {
     59      'name': name,
     60      'unit': 'sizeInBytes_smallerIsBetter',
     61      'diagnostics': {
     62          'benchmarks': str(sizes_guid),
     63      },
     64      'sampleValues': [size],
     65      'running': [1, size, math.log(size), size, size, size, 0],
     66      'description': 'chrome-fuchsia package binary sizes',
     67      'summaryOptions': {
     68          'avg': True,
     69          'count': False,
     70          'max': False,
     71          'min': False,
     72          'std': False,
     73          'sum': False,
     74      },
     75  }
     76 
     77  return histogram
     78 
     79 
     80 def CreateSizesHistogram(package_sizes):
     81  """Create a performance dashboard histogram from binary size data."""
     82 
     83  sizes_guid = uuid.uuid1()
     84  histogram = [CreateSizesExternalDiagnostic(sizes_guid)]
     85  for name, size in package_sizes.items():
     86    histogram.append(
     87        CreateSizesHistogramItem('%s_%s' % (name, 'compressed'),
     88                                 size.compressed, sizes_guid))
     89    histogram.append(
     90        CreateSizesHistogramItem('%s_%s' % (name, 'uncompressed'),
     91                                 size.uncompressed, sizes_guid))
     92  return histogram
     93 
     94 
     95 def CreateTestResults(test_status, timestamp):
     96  """Create test results data to write to JSON test results file.
     97 
     98  The JSON data format is defined in
     99  https://chromium.googlesource.com/chromium/src/+/main/docs/testing/json_test_results_format.md
    100  """
    101 
    102  results = {
    103      'tests': {},
    104      'interrupted': False,
    105      'metadata': {
    106          'test_name_prefix': 'build/fuchsia/'
    107      },
    108      'version': 3,
    109      'seconds_since_epoch': timestamp,
    110  }
    111 
    112  num_failures_by_type = {result: 0 for result in ['FAIL', 'PASS', 'CRASH']}
    113  for metric in test_status:
    114    actual_status = test_status[metric]
    115    num_failures_by_type[actual_status] += 1
    116    results['tests'][metric] = {
    117        'expected': 'PASS',
    118        'actual': actual_status,
    119    }
    120  results['num_failures_by_type'] = num_failures_by_type
    121 
    122  return results
    123 
    124 
    125 def GetTestStatus(package_sizes, sizes_config, test_completed):
    126  """Checks package sizes against size limits.
    127 
    128  Returns a tuple of overall test pass/fail status and a dictionary mapping size
    129  limit checks to PASS/FAIL/CRASH status."""
    130 
    131  if not test_completed:
    132    test_status = {'binary_sizes': 'CRASH'}
    133  else:
    134    test_status = {}
    135    for metric, limit in sizes_config['size_limits'].items():
    136      # Strip the "_compressed" suffix from |metric| if it exists.
    137      match = re.match(r'(?P<name>\w+)_compressed', metric)
    138      package_name = match.group('name') if match else metric
    139      if package_name not in package_sizes:
    140        raise Exception('package "%s" not in sizes "%s"' %
    141                        (package_name, str(package_sizes)))
    142      if package_sizes[package_name].compressed <= limit:
    143        test_status[metric] = 'PASS'
    144      else:
    145        test_status[metric] = 'FAIL'
    146 
    147  all_tests_passed = all(status == 'PASS' for status in test_status.values())
    148 
    149  return all_tests_passed, test_status
    150 
    151 
    152 def WriteSimpleTestResults(results_path, test_completed):
    153  """Writes simplified test results file.
    154 
    155  Used when test status is not available.
    156  """
    157 
    158  simple_isolated_script_output = {
    159      'valid': test_completed,
    160      'failures': [],
    161      'version': 'simplified',
    162  }
    163  with open(results_path, 'w') as output_file:
    164    json.dump(simple_isolated_script_output, output_file)
    165 
    166 
    167 def WriteTestResults(results_path, test_completed, test_status, timestamp):
    168  """Writes test results file containing test PASS/FAIL/CRASH statuses."""
    169 
    170  if test_status:
    171    test_results = CreateTestResults(test_status, timestamp)
    172    with open(results_path, 'w') as results_file:
    173      json.dump(test_results, results_file)
    174  else:
    175    WriteSimpleTestResults(results_path, test_completed)
    176 
    177 
    178 def WriteGerritPluginSizeData(output_path, package_sizes):
    179  """Writes a package size dictionary in json format for the Gerrit binary
    180  sizes plugin."""
    181 
    182  with open(output_path, 'w') as sizes_file:
    183    sizes_data = {name: size.compressed for name, size in package_sizes.items()}
    184    json.dump(sizes_data, sizes_file)
    185 
    186 
    187 def ReadPackageBlobsJson(json_path):
    188  """Reads package blob info from json file.
    189 
    190  Opens json file of blob info written by WritePackageBlobsJson,
    191  and converts back into package blobs used in this script.
    192  """
    193  with open(json_path, 'rt') as json_file:
    194    formatted_blob_info = json.load(json_file)
    195 
    196  package_blobs = {}
    197  for package in formatted_blob_info:
    198    package_blobs[package] = {}
    199    for blob_info in formatted_blob_info[package]:
    200      blob = Blob(name=blob_info['path'],
    201                  hash=blob_info['merkle'],
    202                  uncompressed=blob_info['bytes'],
    203                  compressed=blob_info['size'],
    204                  is_counted=blob_info['is_counted'])
    205      package_blobs[package][blob.name] = blob
    206 
    207  return package_blobs
    208 
    209 
    210 def WritePackageBlobsJson(json_path, package_blobs):
    211  """Writes package blob information in human-readable JSON format.
    212 
    213  The json data is an array of objects containing these keys:
    214    'path': string giving blob location in the local file system
    215    'merkle': the blob's Merkle hash
    216    'bytes': the number of uncompressed bytes in the blod
    217    'size': the size of the compressed blob in bytes.  A multiple of the blobfs
    218        block size (8192)
    219    'is_counted: true if the blob counts towards the package budget, or false
    220        if not (for ICU blobs or blobs distributed in the SDK)"""
    221 
    222  formatted_blob_stats_per_package = {}
    223  for package in package_blobs:
    224    blob_data = []
    225    for blob_name in package_blobs[package]:
    226      blob = package_blobs[package][blob_name]
    227      blob_data.append({
    228          'path': str(blob.name),
    229          'merkle': str(blob.hash),
    230          'bytes': blob.uncompressed,
    231          'size': blob.compressed,
    232          'is_counted': blob.is_counted
    233      })
    234    formatted_blob_stats_per_package[package] = blob_data
    235 
    236  with (open(json_path, 'w')) as json_file:
    237    json.dump(formatted_blob_stats_per_package, json_file, indent=2)
    238 
    239 
    240 def WritePackageSizesJson(json_path, package_sizes):
    241  """Writes package sizes into a human-readable JSON format.
    242 
    243  JSON data is a dictionary of each package name being a key, with
    244  the following keys within the sub-object:
    245    'compressed': compressed size of the package in bytes.
    246    'uncompressed': uncompressed size of the package in bytes.
    247  """
    248  formatted_package_sizes = {}
    249  for package, size_info in package_sizes.items():
    250    formatted_package_sizes[package] = {
    251        'uncompressed': size_info.uncompressed,
    252        'compressed': size_info.compressed
    253    }
    254  with (open(json_path, 'w')) as json_file:
    255    json.dump(formatted_package_sizes, json_file, indent=2)
    256 
    257 
    258 def ReadPackageSizesJson(json_path):
    259  """Reads package_sizes from a given JSON file.
    260 
    261  Opens json file of blob info written by WritePackageSizesJson,
    262  and converts back into package sizes used in this script.
    263  """
    264  with open(json_path, 'rt') as json_file:
    265    formatted_package_info = json.load(json_file)
    266 
    267  package_sizes = {}
    268  for package, size_info in formatted_package_info.items():
    269    package_sizes[package] = PackageSizes(
    270        compressed=size_info['compressed'],
    271        uncompressed=size_info['uncompressed'])
    272  return package_sizes
    273 
    274 
    275 def GetCompressedSize(file_path):
    276  """Measures file size after blobfs compression."""
    277 
    278  compressor_path = get_host_tool_path('blobfs-compression')
    279  try:
    280    temp_dir = tempfile.mkdtemp()
    281    compressed_file_path = os.path.join(temp_dir, os.path.basename(file_path))
    282    compressor_cmd = [
    283        compressor_path,
    284        '--source_file=%s' % file_path,
    285        '--compressed_file=%s' % compressed_file_path
    286    ]
    287    proc = subprocess.Popen(compressor_cmd,
    288                            stdout=subprocess.PIPE,
    289                            stderr=subprocess.STDOUT)
    290    proc.wait()
    291    compressor_output = proc.stdout.read().decode('utf-8')
    292    if proc.returncode != 0:
    293      print(compressor_output, file=sys.stderr)
    294      raise Exception('Error while running %s' % compressor_path)
    295  finally:
    296    shutil.rmtree(temp_dir)
    297 
    298  # Match a compressed bytes total from blobfs-compression output like
    299  # Wrote 360830 bytes (40% compression)
    300  blobfs_compressed_bytes_re = r'Wrote\s+(?P<bytes>\d+)\s+bytes'
    301 
    302  match = re.search(blobfs_compressed_bytes_re, compressor_output)
    303  if not match:
    304    print(compressor_output, file=sys.stderr)
    305    raise Exception('Could not get compressed bytes for %s' % file_path)
    306 
    307  # Round the compressed file size up to an integer number of blobfs blocks.
    308  BLOBFS_BLOCK_SIZE = 8192  # Fuchsia's blobfs file system uses 8KiB blocks.
    309  blob_bytes = int(match.group('bytes'))
    310  return int(math.ceil(blob_bytes / BLOBFS_BLOCK_SIZE)) * BLOBFS_BLOCK_SIZE
    311 
    312 
    313 def ExtractFarFile(file_path, extract_dir):
    314  """Extracts contents of a Fuchsia archive file to the specified directory."""
    315 
    316  far_tool = get_host_tool_path('far')
    317 
    318  if not os.path.isfile(far_tool):
    319    raise Exception('Could not find FAR host tool "%s".' % far_tool)
    320  if not os.path.isfile(file_path):
    321    raise Exception('Could not find FAR file "%s".' % file_path)
    322 
    323  subprocess.check_call([
    324      far_tool, 'extract',
    325      '--archive=%s' % file_path,
    326      '--output=%s' % extract_dir
    327  ])
    328 
    329 
    330 def GetBlobNameHashes(meta_dir):
    331  """Returns mapping from Fuchsia pkgfs paths to blob hashes.  The mapping is
    332  read from the extracted meta.far archive contained in an extracted package
    333  archive."""
    334 
    335  blob_name_hashes = {}
    336  contents_path = os.path.join(meta_dir, 'meta', 'contents')
    337  with open(contents_path) as lines:
    338    for line in lines:
    339      (pkgfs_path, blob_hash) = line.strip().split('=')
    340      blob_name_hashes[pkgfs_path] = blob_hash
    341  return blob_name_hashes
    342 
    343 
    344 # Compiled regular expression matching strings like *.so, *.so.1, *.so.2, ...
    345 SO_FILENAME_REGEXP = re.compile(r'\.so(\.\d+)?$')
    346 
    347 
    348 def GetSdkModules():
    349  """Finds shared objects (.so) under the Fuchsia SDK arch directory in dist or
    350  lib subdirectories.
    351 
    352  Returns a set of shared objects' filenames.
    353  """
    354 
    355  # Fuchsia SDK arch directory path (contains all shared object files).
    356  sdk_arch_dir = os.path.join(SDK_ROOT, 'arch')
    357  # Leaf subdirectories containing shared object files.
    358  sdk_so_leaf_dirs = ['dist', 'lib']
    359  # Match a shared object file name.
    360  sdk_so_filename_re = r'\.so(\.\d+)?$'
    361 
    362  lib_names = set()
    363  for dirpath, _, file_names in os.walk(sdk_arch_dir):
    364    if os.path.basename(dirpath) in sdk_so_leaf_dirs:
    365      for name in file_names:
    366        if SO_FILENAME_REGEXP.search(name):
    367          lib_names.add(name)
    368  return lib_names
    369 
    370 
    371 def FarBaseName(name):
    372  _, name = os.path.split(name)
    373  name = re.sub(r'\.far$', '', name)
    374  return name
    375 
    376 
    377 def GetPackageMerkleRoot(far_file_path):
    378  """Returns a package's Merkle digest."""
    379 
    380  # The digest is the first word on the first line of the merkle tool's output.
    381  merkle_tool = get_host_tool_path('merkleroot')
    382  output = subprocess.check_output([merkle_tool, far_file_path])
    383  return output.splitlines()[0].split()[0]
    384 
    385 
    386 def GetBlobs(far_file, build_out_dir):
    387  """Calculates compressed and uncompressed blob sizes for specified FAR file.
    388  Marks ICU blobs and blobs from SDK libraries as not counted."""
    389 
    390  base_name = FarBaseName(far_file)
    391 
    392  extract_dir = tempfile.mkdtemp()
    393 
    394  # Extract files and blobs from the specified Fuchsia archive.
    395  far_file_path = os.path.join(build_out_dir, far_file)
    396  far_extract_dir = os.path.join(extract_dir, base_name)
    397  ExtractFarFile(far_file_path, far_extract_dir)
    398 
    399  # Extract the meta.far archive contained in the specified Fuchsia archive.
    400  meta_far_file_path = os.path.join(far_extract_dir, 'meta.far')
    401  meta_far_extract_dir = os.path.join(extract_dir, '%s_meta' % base_name)
    402  ExtractFarFile(meta_far_file_path, meta_far_extract_dir)
    403 
    404  # Map Linux filesystem blob names to blob hashes.
    405  blob_name_hashes = GetBlobNameHashes(meta_far_extract_dir)
    406 
    407  # "System" files whose sizes are not charged against component size budgets.
    408  # Fuchsia SDK modules and the ICU icudtl.dat file sizes are not counted.
    409  system_files = GetSdkModules() | set(['icudtl.dat'])
    410 
    411  # Add the meta.far file blob.
    412  blobs = {}
    413  meta_name = 'meta.far'
    414  meta_hash = GetPackageMerkleRoot(meta_far_file_path)
    415  compressed = GetCompressedSize(meta_far_file_path)
    416  uncompressed = os.path.getsize(meta_far_file_path)
    417  blobs[meta_name] = Blob(meta_name, meta_hash, compressed, uncompressed, True)
    418 
    419  # Add package blobs.
    420  for blob_name, blob_hash in blob_name_hashes.items():
    421    extracted_blob_path = os.path.join(far_extract_dir, blob_hash)
    422    compressed = GetCompressedSize(extracted_blob_path)
    423    uncompressed = os.path.getsize(extracted_blob_path)
    424    is_counted = os.path.basename(blob_name) not in system_files
    425    blobs[blob_name] = Blob(blob_name, blob_hash, compressed, uncompressed,
    426                            is_counted)
    427 
    428  shutil.rmtree(extract_dir)
    429 
    430  return blobs
    431 
    432 
    433 def GetPackageBlobs(far_files, build_out_dir):
    434  """Returns dictionary mapping package names to blobs contained in the package.
    435 
    436  Prints package blob size statistics."""
    437 
    438  package_blobs = {}
    439  for far_file in far_files:
    440    package_name = FarBaseName(far_file)
    441    if package_name in package_blobs:
    442      raise Exception('Duplicate FAR file base name "%s".' % package_name)
    443    package_blobs[package_name] = GetBlobs(far_file, build_out_dir)
    444 
    445  # Print package blob sizes (does not count sharing).
    446  for package_name in sorted(package_blobs.keys()):
    447    print('Package blob sizes: %s' % package_name)
    448    print('%-64s %12s %12s %s' %
    449          ('blob hash', 'compressed', 'uncompressed', 'path'))
    450    print('%s %s %s %s' % (64 * '-', 12 * '-', 12 * '-', 20 * '-'))
    451    for blob_name in sorted(package_blobs[package_name].keys()):
    452      blob = package_blobs[package_name][blob_name]
    453      if blob.is_counted:
    454        print('%64s %12d %12d %s' %
    455              (blob.hash, blob.compressed, blob.uncompressed, blob.name))
    456 
    457  return package_blobs
    458 
    459 
    460 def GetPackageSizes(package_blobs):
    461  """Calculates compressed and uncompressed package sizes from blob sizes."""
    462 
    463  # TODO(crbug.com/40718363): Use partial sizes for blobs shared by
    464  # non Chrome-Fuchsia packages.
    465 
    466  # Count number of packages sharing blobs (a count of 1 is not shared).
    467  blob_counts = collections.defaultdict(int)
    468  for package_name in package_blobs:
    469    for blob_name in package_blobs[package_name]:
    470      blob = package_blobs[package_name][blob_name]
    471      blob_counts[blob.hash] += 1
    472 
    473  # Package sizes are the sum of blob sizes divided by their share counts.
    474  package_sizes = {}
    475  for package_name in package_blobs:
    476    compressed_total = 0
    477    uncompressed_total = 0
    478    for blob_name in package_blobs[package_name]:
    479      blob = package_blobs[package_name][blob_name]
    480      if blob.is_counted:
    481        count = blob_counts[blob.hash]
    482        compressed_total += blob.compressed // count
    483        uncompressed_total += blob.uncompressed // count
    484    package_sizes[package_name] = PackageSizes(compressed_total,
    485                                               uncompressed_total)
    486 
    487  return package_sizes
    488 
    489 
    490 def GetBinarySizesAndBlobs(args, sizes_config):
    491  """Get binary size data and contained blobs for packages specified in args.
    492 
    493  If "total_size_name" is set, then computes a synthetic package size which is
    494  the aggregated sizes across all packages."""
    495 
    496  # Calculate compressed and uncompressed package sizes.
    497  package_blobs = GetPackageBlobs(sizes_config['far_files'], args.build_out_dir)
    498  package_sizes = GetPackageSizes(package_blobs)
    499 
    500  # Optionally calculate total compressed and uncompressed package sizes.
    501  if 'far_total_name' in sizes_config:
    502    compressed = sum([a.compressed for a in package_sizes.values()])
    503    uncompressed = sum([a.uncompressed for a in package_sizes.values()])
    504    package_sizes[sizes_config['far_total_name']] = PackageSizes(
    505        compressed, uncompressed)
    506 
    507  for name, size in package_sizes.items():
    508    print('%s: compressed size %d, uncompressed size %d' %
    509          (name, size.compressed, size.uncompressed))
    510 
    511  return package_sizes, package_blobs
    512 
    513 
    514 def main():
    515  parser = argparse.ArgumentParser()
    516  parser.add_argument(
    517      '--build-out-dir',
    518      '--output-directory',
    519      type=os.path.realpath,
    520      required=True,
    521      help='Location of the build artifacts.',
    522  )
    523  parser.add_argument(
    524      '--isolated-script-test-output',
    525      type=os.path.realpath,
    526      help='File to which simplified JSON results will be written.')
    527  parser.add_argument(
    528      '--size-plugin-json-path',
    529      help='Optional path for json size data for the Gerrit binary size plugin',
    530  )
    531  parser.add_argument(
    532      '--sizes-path',
    533      default=os.path.join('tools', 'fuchsia', 'size_tests', 'fyi_sizes.json'),
    534      help='path to package size limits json file.  The path is relative to '
    535      'the workspace src directory')
    536  parser.add_argument('--verbose',
    537                      '-v',
    538                      action='store_true',
    539                      help='Enable verbose output')
    540  # Accepted to conform to the isolated script interface, but ignored.
    541  parser.add_argument('--isolated-script-test-filter', help=argparse.SUPPRESS)
    542  parser.add_argument('--isolated-script-test-perf-output',
    543                      help=argparse.SUPPRESS)
    544  args = parser.parse_args()
    545 
    546  if args.verbose:
    547    print('Fuchsia binary sizes')
    548    print('Working directory', os.getcwd())
    549    print('Args:')
    550    for var in vars(args):
    551      print('  {}: {}'.format(var, getattr(args, var) or ''))
    552 
    553  if not os.path.isdir(args.build_out_dir):
    554    raise Exception('Could not find build output directory "%s".' %
    555                    args.build_out_dir)
    556 
    557  with open(os.path.join(DIR_SRC_ROOT, args.sizes_path)) as sizes_file:
    558    sizes_config = json.load(sizes_file)
    559 
    560  if args.verbose:
    561    print('Sizes Config:')
    562    print(json.dumps(sizes_config))
    563 
    564  for far_rel_path in sizes_config['far_files']:
    565    far_abs_path = os.path.join(args.build_out_dir, far_rel_path)
    566    if not os.path.isfile(far_abs_path):
    567      raise Exception('Could not find FAR file "%s".' % far_abs_path)
    568 
    569  test_name = 'sizes'
    570  timestamp = time.time()
    571  test_completed = False
    572  all_tests_passed = False
    573  test_status = {}
    574  package_sizes = {}
    575  package_blobs = {}
    576  sizes_histogram = []
    577 
    578  results_directory = None
    579  if args.isolated_script_test_output:
    580    results_directory = os.path.join(
    581        os.path.dirname(args.isolated_script_test_output), test_name)
    582    if not os.path.exists(results_directory):
    583      os.makedirs(results_directory)
    584 
    585  try:
    586    package_sizes, package_blobs = GetBinarySizesAndBlobs(args, sizes_config)
    587    sizes_histogram = CreateSizesHistogram(package_sizes)
    588    test_completed = True
    589  except:
    590    _, value, trace = sys.exc_info()
    591    traceback.print_tb(trace)
    592    print(str(value))
    593  finally:
    594    all_tests_passed, test_status = GetTestStatus(package_sizes, sizes_config,
    595                                                  test_completed)
    596 
    597    if results_directory:
    598      WriteTestResults(os.path.join(results_directory, 'test_results.json'),
    599                       test_completed, test_status, timestamp)
    600      with open(os.path.join(results_directory, 'perf_results.json'), 'w') as f:
    601        json.dump(sizes_histogram, f)
    602      WritePackageBlobsJson(
    603          os.path.join(results_directory, PACKAGES_BLOBS_FILE), package_blobs)
    604      WritePackageSizesJson(
    605          os.path.join(results_directory, PACKAGES_SIZES_FILE), package_sizes)
    606 
    607    if args.isolated_script_test_output:
    608      WriteTestResults(args.isolated_script_test_output, test_completed,
    609                       test_status, timestamp)
    610 
    611    if args.size_plugin_json_path:
    612      WriteGerritPluginSizeData(args.size_plugin_json_path, package_sizes)
    613 
    614    return 0 if all_tests_passed else 1
    615 
    616 
    617 if __name__ == '__main__':
    618  sys.exit(main())