tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lacros_resource_sizes.py (14444B)


      1 #!/usr/bin/env python3
      2 # Copyright 2020 The Chromium Authors
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 """Reports binary size metrics for LaCrOS build artifacts.
      6 
      7 More information at //docs/speed/binary_size/metrics.md.
      8 """
      9 
     10 import argparse
     11 import collections
     12 import contextlib
     13 import json
     14 import logging
     15 import os
     16 import subprocess
     17 import sys
     18 import tempfile
     19 SRC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
     20 sys.path.insert(0, os.path.join(SRC_DIR, 'build', 'util'))
     21 from lib.results import result_sink
     22 from lib.results import result_types
     23 
     24 
     25 @contextlib.contextmanager
     26 def _SysPath(path):
     27  """Library import context that temporarily appends |path| to |sys.path|."""
     28  if path and path not in sys.path:
     29    sys.path.insert(0, path)
     30  else:
     31    path = None  # Indicates that |sys.path| is not modified.
     32  try:
     33    yield
     34  finally:
     35    if path:
     36      sys.path.pop(0)
     37 
     38 
     39 DIR_SOURCE_ROOT = os.environ.get(
     40    'CHECKOUT_SOURCE_ROOT',
     41    os.path.abspath(
     42        os.path.join(os.path.dirname(__file__), os.pardir, os.pardir)))
     43 
     44 BUILD_UTIL_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util')
     45 
     46 TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult',
     47                            'tracing')
     48 
     49 EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party',
     50                             'eu-strip', 'bin', 'eu-strip')
     51 
     52 with _SysPath(BUILD_UTIL_PATH):
     53  from lib.common import perf_tests_results_helper
     54 
     55 with _SysPath(TRACING_PATH):
     56  from tracing.value import convert_chart_json  # pylint: disable=import-error
     57 
     58 _BASE_CHART = {
     59    'format_version': '0.1',
     60    'benchmark_name': 'resource_sizes',
     61    'trace_rerun_options': [],
     62    'charts': {}
     63 }
     64 
     65 _KEY_RAW = 'raw'
     66 _KEY_GZIPPED = 'gzipped'
     67 _KEY_STRIPPED = 'stripped'
     68 _KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped'
     69 
     70 
     71 class _Group:
     72  """A group of build artifacts whose file sizes are summed and tracked.
     73 
     74  Build artifacts for size tracking fall under these categories:
     75  * File: A single file.
     76  * Group: A collection of files.
     77  * Dir: All files under a directory.
     78 
     79  Attributes:
     80    paths: A list of files or directories to be tracked together.
     81    title: The display name of the group.
     82    track_stripped: Whether to also track summed stripped ELF sizes.
     83    track_compressed: Whether to also track summed compressed sizes.
     84  """
     85 
     86  def __init__(self, paths, title, track_stripped=False,
     87               track_compressed=False):
     88    self.paths = paths
     89    self.title = title
     90    self.track_stripped = track_stripped
     91    self.track_compressed = track_compressed
     92 
     93  def __eq__(self, other):
     94    """Overrides the default implementation"""
     95    if isinstance(other, _Group):
     96      return (self.paths == other.paths) & (self.title == other.title) & (
     97          self.track_stripped == other.track_stripped) & (
     98              self.track_compressed == other.track_compressed)
     99    return False
    100 
    101 # Common artifacts in official builder lacros-arm32 and lacros64 in
    102 # src-internal. The artifcts can be found in
    103 # chromium/src-internal/testing/buildbot/archive/lacros64.json and
    104 # chromium/src-internal/testing/buildbot/archive/lacros-arm32.json
    105 # chromium/src-internal/testing/buildbot/archive/lacros-arm64.json
    106 _TRACKED_GROUPS = [
    107    _Group(paths=['chrome'],
    108           title='File: chrome',
    109           track_stripped=True,
    110           track_compressed=True),
    111    _Group(paths=['chrome_crashpad_handler'],
    112           title='File: chrome_crashpad_handler'),
    113    _Group(paths=['icudtl.dat'], title='File: icudtl.dat'),
    114    _Group(paths=['icudtl.dat.hash'], title='File: icudtl.dat.hash'),
    115    _Group(paths=['libEGL.so'], title='File: libEGL.so'),
    116    _Group(paths=['libGLESv2.so'], title='File: libGLESv2.so'),
    117    _Group(paths=['nacl_helper'], title='File: nacl_helper'),
    118    _Group(paths=['resources.pak'], title='File: resources.pak'),
    119    _Group(paths=[
    120        'chrome_100_percent.pak', 'chrome_200_percent.pak',
    121        'headless_lib_data.pak', 'headless_lib_strings.pak'
    122    ],
    123           title='Group: Other PAKs'),
    124    _Group(paths=['snapshot_blob.bin'], title='Group: Misc'),
    125    _Group(paths=['locales/'], title='Dir: locales'),
    126    _Group(paths=['PrivacySandboxAttestationsPreloaded/'],
    127           title='Dir: PrivacySandboxAttestationsPreloaded'),
    128    _Group(paths=['resources/accessibility/'],
    129           title='Dir: resources/accessibility'),
    130    _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'),
    131 ]
    132 
    133 
    134 def _visit_paths(base_dir, paths):
    135  """Itemizes files specified by a list of paths.
    136 
    137  Args:
    138    base_dir: Base directory for all elements in |paths|.
    139    paths: A list of filenames or directory names to specify files whose sizes
    140      to be counted. Directories are recursed. There's no de-duping effort.
    141      Non-existing files or directories are ignored (with warning message).
    142  """
    143  for path in paths:
    144    full_path = os.path.join(base_dir, path)
    145    if os.path.exists(full_path):
    146      if os.path.isdir(full_path):
    147        for dirpath, _, filenames in os.walk(full_path):
    148          for filename in filenames:
    149            yield os.path.join(dirpath, filename)
    150      else:  # Assume is file.
    151        yield full_path
    152    else:
    153      logging.critical('Not found: %s', path)
    154 
    155 
    156 def _is_probably_elf(filename):
    157  """Heuristically decides whether |filename| is ELF via magic signature."""
    158  with open(filename, 'rb') as fh:
    159    return fh.read(4) == '\x7FELF'
    160 
    161 
    162 def _is_unstrippable_elf(filename):
    163  """Identifies known-unstrippable ELF files to denoise the system."""
    164  return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so')
    165 
    166 
    167 def _get_filesize(filename):
    168  """Returns the size of a file, or 0 if file is not found."""
    169  try:
    170    return os.path.getsize(filename)
    171  except OSError:
    172    logging.critical('Failed to get size: %s', filename)
    173  return 0
    174 
    175 
    176 def _get_gzipped_filesize(filename):
    177  """Returns the gzipped size of a file, or 0 if file is not found."""
    178  BUFFER_SIZE = 65536
    179  if not os.path.isfile(filename):
    180    return 0
    181  try:
    182    # Call gzip externally instead of using gzip package since it's > 2x faster.
    183    cmd = ['gzip', '-c', filename]
    184    p = subprocess.Popen(cmd, stdout=subprocess.PIPE)
    185    # Manually counting bytes instead of using len(p.communicate()[0]) to avoid
    186    # buffering the entire compressed data (can be ~100 MB).
    187    ret = 0
    188    while True:
    189      chunk = len(p.stdout.read(BUFFER_SIZE))
    190      if chunk == 0:
    191        break
    192      ret += chunk
    193    return ret
    194  except OSError:
    195    logging.critical('Failed to get gzipped size: %s', filename)
    196  return 0
    197 
    198 
    199 def _get_catagorized_filesizes(filename):
    200  """Measures |filename| sizes under various transforms.
    201 
    202  Returns: A Counter (keyed by _Key_* constants) that stores measured sizes.
    203  """
    204  sizes = collections.Counter()
    205  sizes[_KEY_RAW] = _get_filesize(filename)
    206  sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename)
    207 
    208  # Pre-assign values for non-ELF, or in case of failure for ELF.
    209  sizes[_KEY_STRIPPED] = sizes[_KEY_RAW]
    210  sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED]
    211 
    212  if _is_probably_elf(filename) and not _is_unstrippable_elf(filename):
    213    try:
    214      fd, temp_file = tempfile.mkstemp()
    215      os.close(fd)
    216      cmd = [EU_STRIP_PATH, filename, '-o', temp_file]
    217      subprocess.check_output(cmd)
    218      sizes[_KEY_STRIPPED] = _get_filesize(temp_file)
    219      sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file)
    220      if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]:
    221        # This weird case has been observed for libwidevinecdm.so.
    222        logging.critical('Stripping made things worse for %s' % filename)
    223    except subprocess.CalledProcessError:
    224      logging.critical('Failed to strip file: %s' % filename)
    225    finally:
    226      os.unlink(temp_file)
    227  return sizes
    228 
    229 
    230 def _dump_chart_json(output_dir, chartjson):
    231  """Writes chart histogram to JSON files.
    232 
    233  Output files:
    234    results-chart.json contains the chart JSON.
    235    perf_results.json contains histogram JSON for Catapult.
    236 
    237  Args:
    238    output_dir: Directory to place the JSON files.
    239    chartjson: Source JSON data for output files.
    240  """
    241  results_path = os.path.join(output_dir, 'results-chart.json')
    242  logging.critical('Dumping chartjson to %s', results_path)
    243  with open(results_path, 'w') as json_file:
    244    json.dump(chartjson, json_file, indent=2)
    245 
    246  # We would ideally generate a histogram set directly instead of generating
    247  # chartjson then converting. However, perf_tests_results_helper is in
    248  # //build, which doesn't seem to have any precedent for depending on
    249  # anything in Catapult. This can probably be fixed, but since this doesn't
    250  # need to be super fast or anything, converting is a good enough solution
    251  # for the time being.
    252  histogram_result = convert_chart_json.ConvertChartJson(results_path)
    253  if histogram_result.returncode != 0:
    254    raise Exception('chartjson conversion failed with error: ' +
    255                    histogram_result.stdout)
    256 
    257  histogram_path = os.path.join(output_dir, 'perf_results.json')
    258  logging.critical('Dumping histograms to %s', histogram_path)
    259  with open(histogram_path, 'wb') as json_file:
    260    json_file.write(histogram_result.stdout)
    261 
    262 
    263 def _run_resource_sizes(args):
    264  """Main flow to extract and output size data."""
    265  chartjson = _BASE_CHART.copy()
    266  chartjson.update({
    267      'benchmark_description':
    268      ('LaCrOS %s resource size information.' % args.arch)
    269  })
    270  report_func = perf_tests_results_helper.ReportPerfResult
    271  total_sizes = collections.Counter()
    272 
    273  def report_sizes(sizes, title, track_stripped, track_compressed):
    274    report_func(chart_data=chartjson,
    275                graph_title=title,
    276                trace_title='size',
    277                value=sizes[_KEY_RAW],
    278                units='bytes')
    279 
    280    if track_stripped:
    281      report_func(chart_data=chartjson,
    282                  graph_title=title + ' (Stripped)',
    283                  trace_title='size',
    284                  value=sizes[_KEY_STRIPPED],
    285                  units='bytes')
    286 
    287    if track_compressed:
    288      report_func(chart_data=chartjson,
    289                  graph_title=title + ' (Gzipped)',
    290                  trace_title='size',
    291                  value=sizes[_KEY_GZIPPED],
    292                  units='bytes')
    293 
    294    if track_stripped and track_compressed:
    295      report_func(chart_data=chartjson,
    296                  graph_title=title + ' (Stripped, Gzipped)',
    297                  trace_title='size',
    298                  value=sizes[_KEY_STRIPPED_GZIPPED],
    299                  units='bytes')
    300 
    301  tracked_groups = _TRACKED_GROUPS.copy()
    302  # Architecture amd64 requires artifact nacl_irt_x86_64.nexe.
    303  if args.arch == 'amd64':
    304    tracked_groups.append(
    305        _Group(paths=['nacl_irt_x86_64.nexe'],
    306               title='File: nacl_irt_x86_64.nexe'))
    307  # Architecture arm32 requires artifact nacl_irt_arm.nexe.
    308  elif args.arch == 'arm32':
    309    tracked_groups.append(
    310        _Group(paths=['nacl_irt_arm.nexe'], title='File: nacl_irt_arm.nexe'))
    311    tracked_groups.append(
    312        _Group(paths=['nacl_helper_bootstrap'],
    313               title='File: nacl_helper_bootstrap'))
    314  # TODO(crbug.com/40236427): remove the following part once nacl files
    315  # are available.
    316  elif args.arch == 'arm64':
    317    tracked_groups.remove(
    318        _Group(paths=['nacl_helper'], title='File: nacl_helper'))
    319  for g in tracked_groups:
    320    sizes = sum(
    321        map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)),
    322        collections.Counter())
    323    report_sizes(sizes, g.title, g.track_stripped, g.track_compressed)
    324 
    325    # Total compressed size is summed over individual compressed sizes, instead
    326    # of concatanating first, then compress everything. This is done for
    327    # simplicity. It also gives a conservative size estimate (assuming file
    328    # metadata and overheads are negligible).
    329    total_sizes += sizes
    330 
    331  report_sizes(total_sizes, 'Total', True, True)
    332 
    333  _dump_chart_json(args.output_dir, chartjson)
    334 
    335 
    336 def main():
    337  """Parses arguments and runs high level flows."""
    338  argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.')
    339 
    340  argparser.add_argument('--chromium-output-directory',
    341                         dest='out_dir',
    342                         required=True,
    343                         type=os.path.realpath,
    344                         help='Location of the build artifacts.')
    345  argparser.add_argument('--arch',
    346                         required=True,
    347                         type=str,
    348                         help='The architecture of lacros, valid values: amd64,'
    349                         ' arm32, arm64')
    350 
    351  output_group = argparser.add_mutually_exclusive_group()
    352 
    353  output_group.add_argument('--output-dir',
    354                            default='.',
    355                            help='Directory to save chartjson to.')
    356 
    357  # Accepted to conform to the isolated script interface, but ignored.
    358  argparser.add_argument('--isolated-script-test-filter',
    359                         help=argparse.SUPPRESS)
    360  argparser.add_argument('--isolated-script-test-perf-output',
    361                         type=os.path.realpath,
    362                         help=argparse.SUPPRESS)
    363 
    364  output_group.add_argument(
    365      '--isolated-script-test-output',
    366      type=os.path.realpath,
    367      help='File to which results will be written in the simplified JSON '
    368      'output format.')
    369 
    370  args = argparser.parse_args()
    371 
    372  isolated_script_output = {'valid': False, 'failures': []}
    373  if args.isolated_script_test_output:
    374    test_name = 'lacros_resource_sizes'
    375    args.output_dir = os.path.join(
    376        os.path.dirname(args.isolated_script_test_output), test_name)
    377    if not os.path.exists(args.output_dir):
    378      os.makedirs(args.output_dir)
    379 
    380  try:
    381    _run_resource_sizes(args)
    382    isolated_script_output = {'valid': True, 'failures': []}
    383  finally:
    384    if args.isolated_script_test_output:
    385      results_path = os.path.join(args.output_dir, 'test_results.json')
    386      with open(results_path, 'w') as output_file:
    387        json.dump(isolated_script_output, output_file)
    388      with open(args.isolated_script_test_output, 'w') as output_file:
    389        json.dump(isolated_script_output, output_file)
    390  result_sink_client = result_sink.TryInitClient()
    391  if result_sink_client:
    392    status = result_types.PASS
    393    if not isolated_script_output['valid']:
    394      status = result_types.UNKNOWN
    395    elif isolated_script_output['failures']:
    396      status = result_types.FAIL
    397    result_sink_client.Post(test_name, status, None, None, None)
    398 
    399 
    400 if __name__ == '__main__':
    401  main()