tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

resource_sizes.py (37862B)


      1 #!/usr/bin/env vpython3
      2 # Copyright 2011 The Chromium Authors
      3 # Use of this source code is governed by a BSD-style license that can be
      4 # found in the LICENSE file.
      5 
      6 """Reports binary size metrics for an APK.
      7 
      8 More information at //docs/speed/binary_size/metrics.md.
      9 """
     10 
     11 
     12 import argparse
     13 import collections
     14 from contextlib import contextmanager
     15 import json
     16 import logging
     17 import os
     18 import posixpath
     19 import re
     20 import struct
     21 import sys
     22 import tempfile
     23 import zipfile
     24 import zlib
     25 
     26 import devil_chromium
     27 from devil.android.sdk import build_tools
     28 from devil.utils import cmd_helper
     29 from devil.utils import lazy
     30 import method_count
     31 from pylib import constants
     32 from pylib.constants import host_paths
     33 
     34 _AAPT_PATH = lazy.WeakConstant(lambda: build_tools.GetPath('aapt'))
     35 _ANDROID_UTILS_PATH = os.path.join(host_paths.DIR_SOURCE_ROOT, 'build',
     36                                   'android', 'gyp')
     37 _READOBJ_PATH = os.path.join(host_paths.DIR_SOURCE_ROOT, 'third_party',
     38                             'llvm-build', 'Release+Asserts', 'bin',
     39                             'llvm-readobj')
     40 
     41 with host_paths.SysPath(host_paths.BUILD_UTIL_PATH):
     42  from lib.common import perf_tests_results_helper
     43  from lib.results import result_sink
     44  from lib.results import result_types
     45 
     46 with host_paths.SysPath(host_paths.TRACING_PATH):
     47  from tracing.value import convert_chart_json  # pylint: disable=import-error
     48 
     49 with host_paths.SysPath(_ANDROID_UTILS_PATH, 0):
     50  from util import build_utils  # pylint: disable=import-error
     51 
     52 # Captures an entire config from aapt output.
     53 _AAPT_CONFIG_PATTERN = r'config %s:(.*?)config [a-zA-Z-]+:'
     54 # Matches string resource entries from aapt output.
     55 _AAPT_ENTRY_RE = re.compile(
     56    r'resource (?P<id>\w{10}) [\w\.]+:string/.*?"(?P<val>.+?)"', re.DOTALL)
     57 _BASE_CHART = {
     58    'format_version': '0.1',
     59    'benchmark_name': 'resource_sizes',
     60    'benchmark_description': 'APK resource size information.',
     61    'trace_rerun_options': [],
     62    'charts': {}
     63 }
     64 # Macro definitions look like (something, 123) when
     65 # enable_resource_allowlist_generation=true.
     66 _RC_HEADER_RE = re.compile(r'^#define (?P<name>\w+).* (?P<id>\d+)\)?$')
     67 _RE_NON_LANGUAGE_PAK = re.compile(r'^assets/.*(resources|percent)\.pak$')
     68 _READELF_SIZES_METRICS = {
     69    'text': ['.text'],
     70    'data': ['.data', '.rodata', '.data.rel.ro', '.data.rel.ro.local'],
     71    'relocations':
     72    ['.rel.dyn', '.rel.plt', '.rela.dyn', '.rela.plt', '.relr.dyn'],
     73    'unwind': [
     74        '.ARM.extab', '.ARM.exidx', '.eh_frame', '.eh_frame_hdr',
     75        '.ARM.exidxsentinel_section_after_text'
     76    ],
     77    'symbols': [
     78        '.dynsym', '.dynstr', '.dynamic', '.shstrtab', '.got', '.plt', '.iplt',
     79        '.got.plt', '.hash', '.gnu.hash'
     80    ],
     81    'other': [
     82        '.init_array', '.preinit_array', '.ctors', '.fini_array', '.comment',
     83        '.note.gnu.gold-version', '.note.crashpad.info', '.note.android.ident',
     84        '.ARM.attributes', '.note.gnu.build-id', '.gnu.version',
     85        '.gnu.version_d', '.gnu.version_r', '.interp', '.gcc_except_table',
     86        '.note.gnu.property'
     87    ]
     88 }
     89 
     90 
     91 class _AccumulatingReporter:
     92  def __init__(self):
     93    self._combined_metrics = collections.defaultdict(int)
     94 
     95  def __call__(self, graph_title, trace_title, value, units):
     96    self._combined_metrics[(graph_title, trace_title, units)] += value
     97 
     98  def DumpReports(self, report_func):
     99    for (graph_title, trace_title,
    100         units), value in sorted(self._combined_metrics.items()):
    101      report_func(graph_title, trace_title, value, units)
    102 
    103 
    104 class _ChartJsonReporter(_AccumulatingReporter):
    105  def __init__(self, chartjson):
    106    super().__init__()
    107    self._chartjson = chartjson
    108    self.trace_title_prefix = ''
    109 
    110  def __call__(self, graph_title, trace_title, value, units):
    111    super().__call__(graph_title, trace_title, value, units)
    112 
    113    perf_tests_results_helper.ReportPerfResult(
    114        self._chartjson, graph_title, self.trace_title_prefix + trace_title,
    115        value, units)
    116 
    117  def SynthesizeTotals(self, unique_method_count):
    118    for tup, value in sorted(self._combined_metrics.items()):
    119      graph_title, trace_title, units = tup
    120      if trace_title == 'unique methods':
    121        value = unique_method_count
    122      perf_tests_results_helper.ReportPerfResult(self._chartjson, graph_title,
    123                                                 'Combined_' + trace_title,
    124                                                 value, units)
    125 
    126 
    127 def _PercentageDifference(a, b):
    128  if a == 0:
    129    return 0
    130  return float(b - a) / a
    131 
    132 
    133 def _ReadZipInfoExtraFieldLength(zip_file, zip_info):
    134  """Reads the value of |extraLength| from |zip_info|'s local file header.
    135 
    136  |zip_info| has an |extra| field, but it's read from the central directory.
    137  Android's zipalign tool sets the extra field only in local file headers.
    138  """
    139  # Refer to https://en.wikipedia.org/wiki/Zip_(file_format)#File_headers
    140  zip_file.fp.seek(zip_info.header_offset + 28)
    141  return struct.unpack('<H', zip_file.fp.read(2))[0]
    142 
    143 
    144 def _MeasureApkSignatureBlock(zip_file):
    145  """Measures the size of the v2 / v3 signing block.
    146 
    147  Refer to: https://source.android.com/security/apksigning/v2
    148  """
    149  # Seek to "end of central directory" struct.
    150  eocd_offset_from_end = -22 - len(zip_file.comment)
    151  zip_file.fp.seek(eocd_offset_from_end, os.SEEK_END)
    152  assert zip_file.fp.read(4) == b'PK\005\006', (
    153      'failed to find end-of-central-directory')
    154 
    155  # Read out the "start of central directory" offset.
    156  zip_file.fp.seek(eocd_offset_from_end + 16, os.SEEK_END)
    157  start_of_central_directory = struct.unpack('<I', zip_file.fp.read(4))[0]
    158 
    159  # Compute the offset after the last zip entry.
    160  last_info = max(zip_file.infolist(), key=lambda i: i.header_offset)
    161  last_header_size = (30 + len(last_info.filename) +
    162                      _ReadZipInfoExtraFieldLength(zip_file, last_info))
    163  end_of_last_file = (last_info.header_offset + last_header_size +
    164                      last_info.compress_size)
    165  return start_of_central_directory - end_of_last_file
    166 
    167 
    168 def _RunReadobj(so_path, options):
    169  return cmd_helper.GetCmdOutput([_READOBJ_PATH, '--elf-output-style=GNU'] +
    170                                 options + [so_path])
    171 
    172 
    173 def _ExtractLibSectionSizesFromApk(apk_path, lib_path):
    174  with Unzip(apk_path, filename=lib_path) as extracted_lib_path:
    175    grouped_section_sizes = collections.defaultdict(int)
    176    no_bits_section_sizes, section_sizes = _CreateSectionNameSizeMap(
    177        extracted_lib_path)
    178    for group_name, section_names in _READELF_SIZES_METRICS.items():
    179      for section_name in section_names:
    180        if section_name in section_sizes:
    181          grouped_section_sizes[group_name] += section_sizes.pop(section_name)
    182 
    183    # Consider all NOBITS sections as .bss.
    184    grouped_section_sizes['bss'] = sum(no_bits_section_sizes.values())
    185 
    186    # Group any unknown section headers into the "other" group.
    187    for section_header, section_size in section_sizes.items():
    188      sys.stderr.write('Unknown elf section header: %s\n' % section_header)
    189      grouped_section_sizes['other'] += section_size
    190 
    191    return grouped_section_sizes
    192 
    193 
    194 def _CreateSectionNameSizeMap(so_path):
    195  stdout = _RunReadobj(so_path, ['-S', '--wide'])
    196  section_sizes = {}
    197  no_bits_section_sizes = {}
    198  # Matches  [ 2] .hash HASH 00000000006681f0 0001f0 003154 04   A  3   0  8
    199  for match in re.finditer(r'\[[\s\d]+\] (\..*)$', stdout, re.MULTILINE):
    200    items = match.group(1).split()
    201    target = no_bits_section_sizes if items[1] == 'NOBITS' else section_sizes
    202    target[items[0]] = int(items[4], 16)
    203 
    204  return no_bits_section_sizes, section_sizes
    205 
    206 
    207 def _ParseManifestAttributes(apk_path):
    208  # Check if the manifest specifies whether or not to extract native libs.
    209  output = cmd_helper.GetCmdOutput([
    210      _AAPT_PATH.read(), 'd', 'xmltree', apk_path, 'AndroidManifest.xml'])
    211 
    212  def parse_attr(namespace, name, default=None):
    213    # android:extractNativeLibs(0x010104ea)=(type 0x12)0x0
    214    # android:extractNativeLibs(0x010104ea)=(type 0x12)0xffffffff
    215    # dist:onDemand=(type 0x12)0xffffffff
    216    m = re.search(
    217        f'(?:{namespace}:)?{name}' + r'(?:\(.*?\))?=\(type .*?\)(\w+)', output)
    218    if m is None:
    219      return default
    220    return int(m.group(1), 16)
    221 
    222  skip_extract_lib = not parse_attr('android', 'extractNativeLibs', default=1)
    223  sdk_version = parse_attr('android', 'minSdkVersion')
    224  is_feature_split = parse_attr('android', 'isFeatureSplit')
    225  # Can use <dist:on-demand>, or <module dist:onDemand="true">.
    226  on_demand = parse_attr('dist', 'onDemand') or 'on-demand' in output
    227  on_demand = bool(on_demand and is_feature_split)
    228 
    229  return sdk_version, skip_extract_lib, on_demand
    230 
    231 
    232 def _NormalizeLanguagePaks(translations, factor):
    233  english_pak = translations.FindByPattern(r'.*/en[-_][Uu][Ss]\.l?pak')
    234  num_translations = translations.GetNumEntries()
    235  ret = 0
    236  if english_pak:
    237    ret -= translations.ComputeZippedSize()
    238    ret += int(english_pak.compress_size * num_translations * factor)
    239  return ret
    240 
    241 
    242 def _NormalizeResourcesArsc(apk_path, num_arsc_files, num_translations,
    243                            out_dir):
    244  """Estimates the expected overhead of untranslated strings in resources.arsc.
    245 
    246  See http://crbug.com/677966 for why this is necessary.
    247  """
    248  # If there are multiple .arsc files, use the resource packaged APK instead.
    249  if num_arsc_files > 1:
    250    if not out_dir:
    251      return -float('inf')
    252    ap_name = os.path.basename(apk_path).replace('.apk', '.ap_')
    253    ap_path = os.path.join(out_dir, 'arsc/apks', ap_name)
    254    if not os.path.exists(ap_path):
    255      raise Exception('Missing expected file: %s, try rebuilding.' % ap_path)
    256    apk_path = ap_path
    257 
    258  aapt_output = _RunAaptDumpResources(apk_path)
    259  # en-rUS is in the default config and may be cluttered with non-translatable
    260  # strings, so en-rGB is a better baseline for finding missing translations.
    261  en_strings = _CreateResourceIdValueMap(aapt_output, 'en-rGB')
    262  fr_strings = _CreateResourceIdValueMap(aapt_output, 'fr')
    263 
    264  # en-US and en-GB will never be translated.
    265  config_count = num_translations - 2
    266 
    267  size = 0
    268  for res_id, string_val in en_strings.items():
    269    if string_val == fr_strings[res_id]:
    270      string_size = len(string_val)
    271      # 7 bytes is the per-entry overhead (not specific to any string). See
    272      # https://android.googlesource.com/platform/frameworks/base.git/+/android-4.2.2_r1/tools/aapt/StringPool.cpp#414.
    273      # The 1.5 factor was determined experimentally and is meant to account for
    274      # other languages generally having longer strings than english.
    275      size += config_count * (7 + string_size * 1.5)
    276 
    277  return int(size)
    278 
    279 
    280 def _CreateResourceIdValueMap(aapt_output, lang):
    281  """Return a map of resource ids to string values for the given |lang|."""
    282  config_re = _AAPT_CONFIG_PATTERN % lang
    283  return {entry.group('id'): entry.group('val')
    284          for config_section in re.finditer(config_re, aapt_output, re.DOTALL)
    285          for entry in re.finditer(_AAPT_ENTRY_RE, config_section.group(0))}
    286 
    287 
    288 def _RunAaptDumpResources(apk_path):
    289  cmd = [_AAPT_PATH.read(), 'dump', '--values', 'resources', apk_path]
    290  status, output = cmd_helper.GetCmdStatusAndOutput(cmd)
    291  if status != 0:
    292    raise Exception('Failed running aapt command: "%s" with output "%s".' %
    293                    (' '.join(cmd), output))
    294  return output
    295 
    296 
    297 class _FileGroup:
    298  """Represents a category that apk files can fall into."""
    299 
    300  def __init__(self, name):
    301    self.name = name
    302    self._zip_infos = []
    303    self._extracted_multipliers = []
    304 
    305  def AddZipInfo(self, zip_info, extracted_multiplier=0):
    306    self._zip_infos.append(zip_info)
    307    self._extracted_multipliers.append(extracted_multiplier)
    308 
    309  def AllEntries(self):
    310    return iter(self._zip_infos)
    311 
    312  def GetNumEntries(self):
    313    return len(self._zip_infos)
    314 
    315  def FindByPattern(self, pattern):
    316    return next((i for i in self._zip_infos if re.match(pattern, i.filename)),
    317                None)
    318 
    319  def FindLargest(self):
    320    if not self._zip_infos:
    321      return None
    322    return max(self._zip_infos, key=lambda i: i.file_size)
    323 
    324  def ComputeZippedSize(self):
    325    return sum(i.compress_size for i in self._zip_infos)
    326 
    327  def ComputeUncompressedSize(self):
    328    return sum(i.file_size for i in self._zip_infos)
    329 
    330  def ComputeExtractedSize(self):
    331    ret = 0
    332    for zi, multiplier in zip(self._zip_infos, self._extracted_multipliers):
    333      ret += zi.file_size * multiplier
    334    return ret
    335 
    336  def ComputeInstallSize(self):
    337    return self.ComputeExtractedSize() + self.ComputeZippedSize()
    338 
    339 
    340 def _AnalyzeInternal(apk_path,
    341                     sdk_version,
    342                     report_func,
    343                     dex_stats_collector,
    344                     out_dir,
    345                     apks_path=None,
    346                     split_name=None):
    347  """Analyse APK to determine size contributions of different file classes.
    348 
    349  Returns: Normalized APK size.
    350  """
    351  dex_stats_collector.CollectFromZip(split_name or '', apk_path)
    352  file_groups = []
    353 
    354  def make_group(name):
    355    group = _FileGroup(name)
    356    file_groups.append(group)
    357    return group
    358 
    359  def has_no_extension(filename):
    360    return os.path.splitext(filename)[1] == ''
    361 
    362  native_code = make_group('Native code')
    363  java_code = make_group('Java code')
    364  native_resources_no_translations = make_group('Native resources (no l10n)')
    365  translations = make_group('Native resources (l10n)')
    366  stored_translations = make_group('Native resources stored (l10n)')
    367  icu_data = make_group('ICU (i18n library) data')
    368  v8_snapshots = make_group('V8 Snapshots')
    369  png_drawables = make_group('PNG drawables')
    370  res_directory = make_group('Non-compiled Android resources')
    371  arsc = make_group('Compiled Android resources')
    372  metadata = make_group('Package metadata')
    373  notices = make_group('licenses.notice file')
    374  unwind_cfi = make_group('unwind_cfi (dev and canary only)')
    375  assets = make_group('Other Android Assets')
    376  unknown = make_group('Unknown files')
    377 
    378  with zipfile.ZipFile(apk_path, 'r') as apk:
    379    apk_contents = apk.infolist()
    380    # Account for zipalign overhead that exists in local file header.
    381    zipalign_overhead = sum(
    382        _ReadZipInfoExtraFieldLength(apk, i) for i in apk_contents)
    383    # Account for zipalign overhead that exists in central directory header.
    384    # Happens when python aligns entries in apkbuilder.py, but does not
    385    # exist when using Android's zipalign. E.g. for bundle .apks files.
    386    zipalign_overhead += sum(len(i.extra) for i in apk_contents)
    387    signing_block_size = _MeasureApkSignatureBlock(apk)
    388 
    389  _, skip_extract_lib, _ = _ParseManifestAttributes(apk_path)
    390 
    391  # Pre-L: Dalvik - .odex file is simply decompressed/optimized dex file (~1x).
    392  # L, M: ART - .odex file is compiled version of the dex file (~4x).
    393  # N: ART - Uses Dalvik-like JIT for normal apps (~1x), full compilation for
    394  #    shared apps (~4x).
    395  # Actual multipliers calculated using "apk_operations.py disk-usage".
    396  # Will need to update multipliers once apk obfuscation is enabled.
    397  # E.g. with obfuscation, the 4.04 changes to 4.46.
    398  speed_profile_dex_multiplier = 1.17
    399  orig_filename = apks_path or apk_path
    400  is_webview = 'WebView' in orig_filename or 'Webview' in orig_filename
    401  is_monochrome = 'Monochrome' in orig_filename
    402  is_library = 'Library' in orig_filename
    403  is_trichrome = 'TrichromeChrome' in orig_filename
    404  # WebView is always a shared APK since other apps load it.
    405  # Library is always shared since it's used by chrome and webview
    406  # Chrome is always shared since renderers can't access dex otherwise
    407  # (see DexFixer).
    408  is_shared_apk = sdk_version >= 24 and (is_monochrome or is_webview
    409                                         or is_library or is_trichrome)
    410  # Dex decompression overhead varies by Android version.
    411  if sdk_version < 21:
    412    # JellyBean & KitKat
    413    dex_multiplier = 1.16
    414  elif sdk_version < 24:
    415    # Lollipop & Marshmallow
    416    dex_multiplier = 4.04
    417  elif is_shared_apk:
    418    # Oreo and above, compilation_filter=speed
    419    dex_multiplier = 4.04
    420  else:
    421    # Oreo and above, compilation_filter=speed-profile
    422    dex_multiplier = speed_profile_dex_multiplier
    423 
    424  total_apk_size = os.path.getsize(apk_path)
    425  for member in apk_contents:
    426    filename = member.filename
    427    # Undo asset path suffixing. https://crbug.com/357131361
    428    if filename.endswith('+'):
    429      suffix_idx = filename.rfind('+', 0, len(filename) - 1)
    430      if suffix_idx != -1:
    431        filename = filename[:suffix_idx]
    432 
    433    if filename.endswith('/'):
    434      continue
    435    if filename.endswith('.so'):
    436      basename = posixpath.basename(filename)
    437      should_extract_lib = not skip_extract_lib and basename.startswith('lib')
    438      native_code.AddZipInfo(
    439          member, extracted_multiplier=int(should_extract_lib))
    440    elif filename.startswith('classes') and filename.endswith('.dex'):
    441      # Android P+, uncompressed dex does not need to be extracted.
    442      compressed = member.compress_type != zipfile.ZIP_STORED
    443      multiplier = dex_multiplier
    444      if not compressed and sdk_version >= 28:
    445        multiplier -= 1
    446 
    447      java_code.AddZipInfo(member, extracted_multiplier=multiplier)
    448    elif re.search(_RE_NON_LANGUAGE_PAK, filename):
    449      native_resources_no_translations.AddZipInfo(member)
    450    elif filename.endswith('.pak') or filename.endswith('.lpak'):
    451      compressed = member.compress_type != zipfile.ZIP_STORED
    452      bucket = translations if compressed else stored_translations
    453      extracted_multiplier = 0
    454      if compressed:
    455        extracted_multiplier = int('en_' in filename or 'en-' in filename)
    456      bucket.AddZipInfo(member, extracted_multiplier=extracted_multiplier)
    457    elif 'icu' in filename and filename.endswith('.dat'):
    458      icu_data.AddZipInfo(member)
    459    elif filename.endswith('.bin'):
    460      v8_snapshots.AddZipInfo(member)
    461    elif filename.startswith('res/'):
    462      if (filename.endswith('.png') or filename.endswith('.webp')
    463          or has_no_extension(filename)):
    464        png_drawables.AddZipInfo(member)
    465      else:
    466        res_directory.AddZipInfo(member)
    467    elif filename.endswith('.arsc'):
    468      arsc.AddZipInfo(member)
    469    elif filename.startswith('META-INF') or filename in (
    470        'AndroidManifest.xml', 'assets/webapk_dex_version.txt',
    471        'stamp-cert-sha256'):
    472      metadata.AddZipInfo(member)
    473    elif filename.endswith('.notice'):
    474      notices.AddZipInfo(member)
    475    elif filename.startswith('assets/unwind_cfi'):
    476      unwind_cfi.AddZipInfo(member)
    477    elif filename.startswith('assets/'):
    478      assets.AddZipInfo(member)
    479    else:
    480      unknown.AddZipInfo(member)
    481 
    482  if apks_path:
    483    # We're mostly focused on size of Chrome for non-English locales, so assume
    484    # Hindi (arbitrarily chosen) locale split is installed.
    485    with zipfile.ZipFile(apks_path) as z:
    486      subpath = 'splits/{}-hi.apk'.format(split_name)
    487      if subpath in z.namelist():
    488        hindi_apk_info = z.getinfo(subpath)
    489        total_apk_size += hindi_apk_info.file_size
    490      elif not is_shared_apk:
    491        # In Chrome, splits should always be enabled.
    492        assert split_name != 'base', 'splits/base-hi.apk should always exist'
    493 
    494  total_install_size = total_apk_size
    495  total_install_size_android_go = total_apk_size
    496  zip_overhead = total_apk_size
    497 
    498  for group in file_groups:
    499    actual_size = group.ComputeZippedSize()
    500    install_size = group.ComputeInstallSize()
    501    uncompressed_size = group.ComputeUncompressedSize()
    502    extracted_size = group.ComputeExtractedSize()
    503    total_install_size += extracted_size
    504    zip_overhead -= actual_size
    505 
    506    report_func('Breakdown', group.name + ' size', actual_size, 'bytes')
    507    report_func('InstallBreakdown', group.name + ' size', int(install_size),
    508                'bytes')
    509    # Only a few metrics are compressed in the first place.
    510    # To avoid over-reporting, track uncompressed size only for compressed
    511    # entries.
    512    if uncompressed_size != actual_size:
    513      report_func('Uncompressed', group.name + ' size', uncompressed_size,
    514                  'bytes')
    515 
    516    if group is java_code:
    517      # Updates are compiled using quicken, but system image uses speed-profile.
    518      multiplier = speed_profile_dex_multiplier
    519 
    520      # Android P+, uncompressed dex does not need to be extracted.
    521      compressed = uncompressed_size != actual_size
    522      if not compressed and sdk_version >= 28:
    523        multiplier -= 1
    524      extracted_size = int(uncompressed_size * multiplier)
    525      total_install_size_android_go += extracted_size
    526      report_func('InstallBreakdownGo', group.name + ' size',
    527                  actual_size + extracted_size, 'bytes')
    528    elif group is translations and apks_path:
    529      # Assume Hindi rather than English (accounted for above in total_apk_size)
    530      total_install_size_android_go += actual_size
    531    else:
    532      total_install_size_android_go += extracted_size
    533 
    534  # Per-file zip overhead is caused by:
    535  # * 30 byte entry header + len(file name)
    536  # * 46 byte central directory entry + len(file name)
    537  # * 0-3 bytes for zipalign.
    538  report_func('Breakdown', 'Zip Overhead', zip_overhead, 'bytes')
    539  report_func('InstallSize', 'APK size', total_apk_size, 'bytes')
    540  report_func('InstallSize', 'Estimated installed size',
    541              int(total_install_size), 'bytes')
    542  report_func('InstallSize', 'Estimated installed size (Android Go)',
    543              int(total_install_size_android_go), 'bytes')
    544  transfer_size = _CalculateCompressedSize(apk_path)
    545  report_func('TransferSize', 'Transfer size (deflate)', transfer_size, 'bytes')
    546 
    547  # Size of main dex vs remaining.
    548  main_dex_info = java_code.FindByPattern('classes.dex')
    549  if main_dex_info:
    550    main_dex_size = main_dex_info.file_size
    551    report_func('Specifics', 'main dex size', main_dex_size, 'bytes')
    552    secondary_size = java_code.ComputeUncompressedSize() - main_dex_size
    553    report_func('Specifics', 'secondary dex size', secondary_size, 'bytes')
    554 
    555  main_lib_info = native_code.FindLargest()
    556  native_code_unaligned_size = 0
    557  for lib_info in native_code.AllEntries():
    558    # Skip placeholders.
    559    if lib_info.file_size == 0:
    560      continue
    561    section_sizes = _ExtractLibSectionSizesFromApk(apk_path, lib_info.filename)
    562    native_code_unaligned_size += sum(v for k, v in section_sizes.items()
    563                                      if k != 'bss')
    564    # Size of main .so vs remaining.
    565    if lib_info == main_lib_info:
    566      main_lib_size = lib_info.file_size
    567      report_func('Specifics', 'main lib size', main_lib_size, 'bytes')
    568      secondary_size = native_code.ComputeUncompressedSize() - main_lib_size
    569      report_func('Specifics', 'other lib size', secondary_size, 'bytes')
    570 
    571      for metric_name, size in section_sizes.items():
    572        report_func('MainLibInfo', metric_name, size, 'bytes')
    573 
    574  # Main metric that we want to monitor for jumps.
    575  normalized_apk_size = total_apk_size
    576  # unwind_cfi exists only in dev, canary, and non-channel builds.
    577  normalized_apk_size -= unwind_cfi.ComputeZippedSize()
    578  # Sections within .so files get 4kb aligned, so use section sizes rather than
    579  # file size. Also gets rid of compression.
    580  normalized_apk_size -= native_code.ComputeZippedSize()
    581  normalized_apk_size += native_code_unaligned_size
    582  # Normalized dex size: Size within the zip + size on disk for Android Go
    583  # devices running Android O (which ~= uncompressed dex size).
    584  # Use a constant compression factor to account for fluctuations.
    585  normalized_apk_size -= java_code.ComputeZippedSize()
    586  normalized_apk_size += java_code.ComputeUncompressedSize()
    587  # Don't include zipalign overhead in normalized size, since it effectively
    588  # causes size changes files that proceed aligned files to be rounded.
    589  # For APKs where classes.dex directly proceeds libchrome.so (the normal case),
    590  # this causes small dex size changes to disappear into libchrome.so alignment.
    591  normalized_apk_size -= zipalign_overhead
    592  # Don't include the size of the apk's signing block because it can fluctuate
    593  # by up to 4kb (from my non-scientific observations), presumably based on hash
    594  # sizes.
    595  normalized_apk_size -= signing_block_size
    596 
    597  # Unaligned size should be ~= uncompressed size or something is wrong.
    598  # As of now, padding_fraction ~= .007
    599  padding_fraction = -_PercentageDifference(
    600      native_code.ComputeUncompressedSize(), native_code_unaligned_size)
    601  # Ignore this check for small / no native code
    602  if native_code.ComputeUncompressedSize() > 1000000:
    603    assert 0 <= padding_fraction < .02, (
    604        'Padding was: {} (file_size={}, sections_sum={})'.format(
    605            padding_fraction, native_code.ComputeUncompressedSize(),
    606            native_code_unaligned_size))
    607 
    608  if apks_path:
    609    # Locale normalization not needed when measuring only one locale.
    610    # E.g. a change that adds 300 chars of unstranslated strings would cause the
    611    # metric to be off by only 390 bytes (assuming a multiplier of 2.3 for
    612    # Hindi).
    613    pass
    614  else:
    615    # Avoid noise caused when strings change and translations haven't yet been
    616    # updated.
    617    num_translations = translations.GetNumEntries()
    618    num_stored_translations = stored_translations.GetNumEntries()
    619 
    620    if num_translations > 1:
    621      # Multipliers found by looking at MonochromePublic.apk and seeing how much
    622      # smaller en-US.pak is relative to the average locale.pak.
    623      normalized_apk_size += _NormalizeLanguagePaks(translations, 1.17)
    624    if num_stored_translations > 1:
    625      normalized_apk_size += _NormalizeLanguagePaks(stored_translations, 1.43)
    626    if num_translations + num_stored_translations > 1:
    627      if num_translations == 0:
    628        # WebView stores all locale paks uncompressed.
    629        num_arsc_translations = num_stored_translations
    630      else:
    631        # Monochrome has more configurations than Chrome since it includes
    632        # WebView (which supports more locales), but these should mostly be
    633        # empty so ignore them here.
    634        num_arsc_translations = num_translations
    635      normalized_apk_size += _NormalizeResourcesArsc(apk_path,
    636                                                     arsc.GetNumEntries(),
    637                                                     num_arsc_translations,
    638                                                     out_dir)
    639 
    640  # It will be -Inf for .apk files with multiple .arsc files and no out_dir set.
    641  if normalized_apk_size < 0:
    642    sys.stderr.write('Skipping normalized_apk_size (no output directory set)\n')
    643  else:
    644    report_func('Specifics', 'normalized apk size', normalized_apk_size,
    645                'bytes')
    646  # The "file count" metric cannot be grouped with any other metrics when the
    647  # end result is going to be uploaded to the perf dashboard in the HistogramSet
    648  # format due to mixed units (bytes vs. zip entries) causing malformed
    649  # summaries to be generated.
    650  # TODO(crbug.com/41425646): Remove this workaround if unit mixing is
    651  # ever supported.
    652  report_func('FileCount', 'file count', len(apk_contents), 'zip entries')
    653 
    654  for info in unknown.AllEntries():
    655    sys.stderr.write(
    656        'Unknown entry: %s %d\n' % (info.filename, info.compress_size))
    657  return normalized_apk_size
    658 
    659 
    660 def _CalculateCompressedSize(file_path):
    661  CHUNK_SIZE = 256 * 1024
    662  compressor = zlib.compressobj()
    663  total_size = 0
    664  with open(file_path, 'rb') as f:
    665    for chunk in iter(lambda: f.read(CHUNK_SIZE), b''):
    666      total_size += len(compressor.compress(chunk))
    667  total_size += len(compressor.flush())
    668  return total_size
    669 
    670 
    671 @contextmanager
    672 def Unzip(zip_file, filename=None):
    673  """Utility for temporary use of a single file in a zip archive."""
    674  with build_utils.TempDir() as unzipped_dir:
    675    unzipped_files = build_utils.ExtractAll(
    676        zip_file, unzipped_dir, True, pattern=filename)
    677    if len(unzipped_files) == 0:
    678      raise Exception(
    679          '%s not found in %s' % (filename, zip_file))
    680    yield unzipped_files[0]
    681 
    682 
    683 def _ConfigOutDir(out_dir):
    684  if out_dir:
    685    constants.SetOutputDirectory(out_dir)
    686  else:
    687    try:
    688      # Triggers auto-detection when CWD == output directory.
    689      constants.CheckOutputDirectory()
    690      out_dir = constants.GetOutDirectory()
    691    except Exception:  # pylint: disable=broad-except
    692      pass
    693  return out_dir
    694 
    695 
    696 def _IterSplits(namelist):
    697  for subpath in namelist:
    698    # Looks for paths like splits/vr-master.apk, splits/vr-hi.apk.
    699    name_parts = subpath.split('/')
    700    if name_parts[0] == 'splits' and len(name_parts) == 2:
    701      name_parts = name_parts[1].split('-')
    702      if len(name_parts) == 2:
    703        split_name, config_name = name_parts
    704        if config_name == 'master.apk':
    705          yield subpath, split_name
    706 
    707 
    708 def _ExtractToTempFile(zip_obj, subpath, temp_file):
    709  temp_file.seek(0)
    710  temp_file.truncate()
    711  temp_file.write(zip_obj.read(subpath))
    712  temp_file.flush()
    713 
    714 
    715 def _AnalyzeApkOrApks(report_func, apk_path, out_dir):
    716  # Create DexStatsCollector here to track unique methods across base & chrome
    717  # modules.
    718  dex_stats_collector = method_count.DexStatsCollector()
    719 
    720  if apk_path.endswith('.apk'):
    721    sdk_version, _, _ = _ParseManifestAttributes(apk_path)
    722    _AnalyzeInternal(apk_path, sdk_version, report_func, dex_stats_collector,
    723                     out_dir)
    724  elif apk_path.endswith('.apks'):
    725    with tempfile.NamedTemporaryFile(suffix='.apk') as f:
    726      with zipfile.ZipFile(apk_path) as z:
    727        # Currently bundletool is creating two apks when .apks is created
    728        # without specifying an sdkVersion. Always measure the one with an
    729        # uncompressed shared library.
    730        try:
    731          info = z.getinfo('splits/base-master_2.apk')
    732        except KeyError:
    733          info = z.getinfo('splits/base-master.apk')
    734        _ExtractToTempFile(z, info.filename, f)
    735        sdk_version, _, _ = _ParseManifestAttributes(f.name)
    736 
    737        orig_report_func = report_func
    738        report_func = _AccumulatingReporter()
    739 
    740        def do_measure(split_name, on_demand):
    741          logging.info('Measuring %s on_demand=%s', split_name, on_demand)
    742          # Use no-op reporting functions to get normalized size for DFMs.
    743          inner_report_func = report_func
    744          inner_dex_stats_collector = dex_stats_collector
    745          if on_demand:
    746            inner_report_func = lambda *_: None
    747            inner_dex_stats_collector = method_count.DexStatsCollector()
    748 
    749          size = _AnalyzeInternal(f.name,
    750                                  sdk_version,
    751                                  inner_report_func,
    752                                  inner_dex_stats_collector,
    753                                  out_dir,
    754                                  apks_path=apk_path,
    755                                  split_name=split_name)
    756          report_func('DFM_' + split_name, 'Size with hindi', size, 'bytes')
    757 
    758        # Measure base outside of the loop since we've already extracted it.
    759        do_measure('base', on_demand=False)
    760 
    761        for subpath, split_name in _IterSplits(z.namelist()):
    762          if split_name != 'base':
    763            _ExtractToTempFile(z, subpath, f)
    764            _, _, on_demand = _ParseManifestAttributes(f.name)
    765            do_measure(split_name, on_demand=on_demand)
    766 
    767        report_func.DumpReports(orig_report_func)
    768        report_func = orig_report_func
    769  else:
    770    raise Exception('Unknown file type: ' + apk_path)
    771 
    772  # Report dex stats outside of _AnalyzeInternal() so that the "unique methods"
    773  # metric is not just the sum of the base and chrome modules.
    774  for metric, count in dex_stats_collector.GetTotalCounts().items():
    775    report_func('Dex', metric, count, 'entries')
    776  report_func('Dex', 'unique methods',
    777              dex_stats_collector.GetUniqueMethodCount(), 'entries')
    778  report_func('DexCache', 'DexCache',
    779              dex_stats_collector.GetDexCacheSize(pre_oreo=sdk_version < 26),
    780              'bytes')
    781 
    782  return dex_stats_collector
    783 
    784 
    785 def _ResourceSizes(args):
    786  chartjson = _BASE_CHART.copy() if args.output_format else None
    787  reporter = _ChartJsonReporter(chartjson)
    788  # Create DexStatsCollector here to track unique methods across trichrome APKs.
    789  dex_stats_collector = method_count.DexStatsCollector()
    790 
    791  specs = [
    792      ('Chrome_', args.trichrome_chrome),
    793      ('WebView_', args.trichrome_webview),
    794      ('Library_', args.trichrome_library),
    795  ]
    796  for prefix, path in specs:
    797    if path:
    798      reporter.trace_title_prefix = prefix
    799      child_dex_stats_collector = _AnalyzeApkOrApks(reporter, path,
    800                                                    args.out_dir)
    801      dex_stats_collector.MergeFrom(prefix, child_dex_stats_collector)
    802 
    803  if any(path for _, path in specs):
    804    reporter.SynthesizeTotals(dex_stats_collector.GetUniqueMethodCount())
    805  else:
    806    _AnalyzeApkOrApks(reporter, args.input, args.out_dir)
    807 
    808  if chartjson:
    809    _DumpChartJson(args, chartjson)
    810 
    811 
    812 def _DumpChartJson(args, chartjson):
    813  if args.output_file == '-':
    814    json_file = sys.stdout
    815  elif args.output_file:
    816    json_file = open(args.output_file, 'w')
    817  else:
    818    results_path = os.path.join(args.output_dir, 'results-chart.json')
    819    logging.critical('Dumping chartjson to %s', results_path)
    820    json_file = open(results_path, 'w')
    821 
    822  json.dump(chartjson, json_file, indent=2)
    823 
    824  if json_file is not sys.stdout:
    825    json_file.close()
    826 
    827  # We would ideally generate a histogram set directly instead of generating
    828  # chartjson then converting. However, perf_tests_results_helper is in
    829  # //build, which doesn't seem to have any precedent for depending on
    830  # anything in Catapult. This can probably be fixed, but since this doesn't
    831  # need to be super fast or anything, converting is a good enough solution
    832  # for the time being.
    833  if args.output_format == 'histograms':
    834    histogram_result = convert_chart_json.ConvertChartJson(results_path)
    835    if histogram_result.returncode != 0:
    836      raise Exception('chartjson conversion failed with error: ' +
    837                      histogram_result.stdout)
    838 
    839    histogram_path = os.path.join(args.output_dir, 'perf_results.json')
    840    logging.critical('Dumping histograms to %s', histogram_path)
    841    with open(histogram_path, 'wb') as json_file:
    842      json_file.write(histogram_result.stdout)
    843 
    844 
    845 def main():
    846  build_utils.InitLogging('RESOURCE_SIZES_DEBUG')
    847  argparser = argparse.ArgumentParser(description='Print APK size metrics.')
    848  argparser.add_argument(
    849      '--min-pak-resource-size',
    850      type=int,
    851      default=20 * 1024,
    852      help='Minimum byte size of displayed pak resources.')
    853  argparser.add_argument(
    854      '--chromium-output-directory',
    855      dest='out_dir',
    856      type=os.path.realpath,
    857      help='Location of the build artifacts.')
    858  argparser.add_argument(
    859      '--chartjson',
    860      action='store_true',
    861      help='DEPRECATED. Use --output-format=chartjson '
    862      'instead.')
    863  argparser.add_argument(
    864      '--output-format',
    865      choices=['chartjson', 'histograms'],
    866      help='Output the results to a file in the given '
    867      'format instead of printing the results.')
    868  argparser.add_argument('--loadable_module', help='Obsolete (ignored).')
    869 
    870  # Accepted to conform to the isolated script interface, but ignored.
    871  argparser.add_argument(
    872      '--isolated-script-test-filter', help=argparse.SUPPRESS)
    873  argparser.add_argument(
    874      '--isolated-script-test-perf-output',
    875      type=os.path.realpath,
    876      help=argparse.SUPPRESS)
    877  argparser.add_argument('--isolated-script-test-repeat',
    878                         help=argparse.SUPPRESS)
    879  argparser.add_argument('--isolated-script-test-launcher-retry-limit',
    880                         help=argparse.SUPPRESS)
    881  output_group = argparser.add_mutually_exclusive_group()
    882 
    883  output_group.add_argument(
    884      '--output-dir', default='.', help='Directory to save chartjson to.')
    885  output_group.add_argument(
    886      '--output-file',
    887      help='Path to output .json (replaces --output-dir). Works only for '
    888      '--output-format=chartjson')
    889  output_group.add_argument(
    890      '--isolated-script-test-output',
    891      type=os.path.realpath,
    892      help='File to which results will be written in the '
    893      'simplified JSON output format.')
    894 
    895  argparser.add_argument('input', help='Path to .apk or .apks file to measure.')
    896  trichrome_group = argparser.add_argument_group(
    897      'Trichrome inputs',
    898      description='When specified, |input| is used only as Test suite name.')
    899  trichrome_group.add_argument(
    900      '--trichrome-chrome', help='Path to Trichrome Chrome .apks')
    901  trichrome_group.add_argument(
    902      '--trichrome-webview', help='Path to Trichrome WebView .apk(s)')
    903  trichrome_group.add_argument(
    904      '--trichrome-library', help='Path to Trichrome Library .apk')
    905  args = argparser.parse_args()
    906 
    907  args.out_dir = _ConfigOutDir(args.out_dir)
    908  devil_chromium.Initialize(output_directory=args.out_dir)
    909 
    910  # TODO(bsheedy): Remove this once uses of --chartjson have been removed.
    911  if args.chartjson:
    912    args.output_format = 'chartjson'
    913 
    914  result_sink_client = result_sink.TryInitClient()
    915  isolated_script_output = {'valid': False, 'failures': []}
    916 
    917  test_name = 'resource_sizes (%s)' % os.path.basename(args.input)
    918 
    919  if args.isolated_script_test_output:
    920    args.output_dir = os.path.join(
    921        os.path.dirname(args.isolated_script_test_output), test_name)
    922    if not os.path.exists(args.output_dir):
    923      os.makedirs(args.output_dir)
    924 
    925  try:
    926    _ResourceSizes(args)
    927    isolated_script_output = {
    928        'valid': True,
    929        'failures': [],
    930    }
    931  finally:
    932    if args.isolated_script_test_output:
    933      results_path = os.path.join(args.output_dir, 'test_results.json')
    934      with open(results_path, 'w') as output_file:
    935        json.dump(isolated_script_output, output_file)
    936      with open(args.isolated_script_test_output, 'w') as output_file:
    937        json.dump(isolated_script_output, output_file)
    938    if result_sink_client:
    939      status = result_types.PASS
    940      if not isolated_script_output['valid']:
    941        status = result_types.UNKNOWN
    942      elif isolated_script_output['failures']:
    943        status = result_types.FAIL
    944      result_sink_client.Post(test_name, status, None, None, None)
    945 
    946 
    947 if __name__ == '__main__':
    948  main()