lacros_resource_sizes.py (14444B)
1 #!/usr/bin/env python3 2 # Copyright 2020 The Chromium Authors 3 # Use of this source code is governed by a BSD-style license that can be 4 # found in the LICENSE file. 5 """Reports binary size metrics for LaCrOS build artifacts. 6 7 More information at //docs/speed/binary_size/metrics.md. 8 """ 9 10 import argparse 11 import collections 12 import contextlib 13 import json 14 import logging 15 import os 16 import subprocess 17 import sys 18 import tempfile 19 SRC_DIR = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..')) 20 sys.path.insert(0, os.path.join(SRC_DIR, 'build', 'util')) 21 from lib.results import result_sink 22 from lib.results import result_types 23 24 25 @contextlib.contextmanager 26 def _SysPath(path): 27 """Library import context that temporarily appends |path| to |sys.path|.""" 28 if path and path not in sys.path: 29 sys.path.insert(0, path) 30 else: 31 path = None # Indicates that |sys.path| is not modified. 32 try: 33 yield 34 finally: 35 if path: 36 sys.path.pop(0) 37 38 39 DIR_SOURCE_ROOT = os.environ.get( 40 'CHECKOUT_SOURCE_ROOT', 41 os.path.abspath( 42 os.path.join(os.path.dirname(__file__), os.pardir, os.pardir))) 43 44 BUILD_UTIL_PATH = os.path.join(DIR_SOURCE_ROOT, 'build', 'util') 45 46 TRACING_PATH = os.path.join(DIR_SOURCE_ROOT, 'third_party', 'catapult', 47 'tracing') 48 49 EU_STRIP_PATH = os.path.join(DIR_SOURCE_ROOT, 'buildtools', 'third_party', 50 'eu-strip', 'bin', 'eu-strip') 51 52 with _SysPath(BUILD_UTIL_PATH): 53 from lib.common import perf_tests_results_helper 54 55 with _SysPath(TRACING_PATH): 56 from tracing.value import convert_chart_json # pylint: disable=import-error 57 58 _BASE_CHART = { 59 'format_version': '0.1', 60 'benchmark_name': 'resource_sizes', 61 'trace_rerun_options': [], 62 'charts': {} 63 } 64 65 _KEY_RAW = 'raw' 66 _KEY_GZIPPED = 'gzipped' 67 _KEY_STRIPPED = 'stripped' 68 _KEY_STRIPPED_GZIPPED = 'stripped_then_gzipped' 69 70 71 class _Group: 72 """A group of build artifacts whose file sizes are summed and tracked. 73 74 Build artifacts for size tracking fall under these categories: 75 * File: A single file. 76 * Group: A collection of files. 77 * Dir: All files under a directory. 78 79 Attributes: 80 paths: A list of files or directories to be tracked together. 81 title: The display name of the group. 82 track_stripped: Whether to also track summed stripped ELF sizes. 83 track_compressed: Whether to also track summed compressed sizes. 84 """ 85 86 def __init__(self, paths, title, track_stripped=False, 87 track_compressed=False): 88 self.paths = paths 89 self.title = title 90 self.track_stripped = track_stripped 91 self.track_compressed = track_compressed 92 93 def __eq__(self, other): 94 """Overrides the default implementation""" 95 if isinstance(other, _Group): 96 return (self.paths == other.paths) & (self.title == other.title) & ( 97 self.track_stripped == other.track_stripped) & ( 98 self.track_compressed == other.track_compressed) 99 return False 100 101 # Common artifacts in official builder lacros-arm32 and lacros64 in 102 # src-internal. The artifcts can be found in 103 # chromium/src-internal/testing/buildbot/archive/lacros64.json and 104 # chromium/src-internal/testing/buildbot/archive/lacros-arm32.json 105 # chromium/src-internal/testing/buildbot/archive/lacros-arm64.json 106 _TRACKED_GROUPS = [ 107 _Group(paths=['chrome'], 108 title='File: chrome', 109 track_stripped=True, 110 track_compressed=True), 111 _Group(paths=['chrome_crashpad_handler'], 112 title='File: chrome_crashpad_handler'), 113 _Group(paths=['icudtl.dat'], title='File: icudtl.dat'), 114 _Group(paths=['icudtl.dat.hash'], title='File: icudtl.dat.hash'), 115 _Group(paths=['libEGL.so'], title='File: libEGL.so'), 116 _Group(paths=['libGLESv2.so'], title='File: libGLESv2.so'), 117 _Group(paths=['nacl_helper'], title='File: nacl_helper'), 118 _Group(paths=['resources.pak'], title='File: resources.pak'), 119 _Group(paths=[ 120 'chrome_100_percent.pak', 'chrome_200_percent.pak', 121 'headless_lib_data.pak', 'headless_lib_strings.pak' 122 ], 123 title='Group: Other PAKs'), 124 _Group(paths=['snapshot_blob.bin'], title='Group: Misc'), 125 _Group(paths=['locales/'], title='Dir: locales'), 126 _Group(paths=['PrivacySandboxAttestationsPreloaded/'], 127 title='Dir: PrivacySandboxAttestationsPreloaded'), 128 _Group(paths=['resources/accessibility/'], 129 title='Dir: resources/accessibility'), 130 _Group(paths=['WidevineCdm/'], title='Dir: WidevineCdm'), 131 ] 132 133 134 def _visit_paths(base_dir, paths): 135 """Itemizes files specified by a list of paths. 136 137 Args: 138 base_dir: Base directory for all elements in |paths|. 139 paths: A list of filenames or directory names to specify files whose sizes 140 to be counted. Directories are recursed. There's no de-duping effort. 141 Non-existing files or directories are ignored (with warning message). 142 """ 143 for path in paths: 144 full_path = os.path.join(base_dir, path) 145 if os.path.exists(full_path): 146 if os.path.isdir(full_path): 147 for dirpath, _, filenames in os.walk(full_path): 148 for filename in filenames: 149 yield os.path.join(dirpath, filename) 150 else: # Assume is file. 151 yield full_path 152 else: 153 logging.critical('Not found: %s', path) 154 155 156 def _is_probably_elf(filename): 157 """Heuristically decides whether |filename| is ELF via magic signature.""" 158 with open(filename, 'rb') as fh: 159 return fh.read(4) == '\x7FELF' 160 161 162 def _is_unstrippable_elf(filename): 163 """Identifies known-unstrippable ELF files to denoise the system.""" 164 return filename.endswith('.nexe') or filename.endswith('libwidevinecdm.so') 165 166 167 def _get_filesize(filename): 168 """Returns the size of a file, or 0 if file is not found.""" 169 try: 170 return os.path.getsize(filename) 171 except OSError: 172 logging.critical('Failed to get size: %s', filename) 173 return 0 174 175 176 def _get_gzipped_filesize(filename): 177 """Returns the gzipped size of a file, or 0 if file is not found.""" 178 BUFFER_SIZE = 65536 179 if not os.path.isfile(filename): 180 return 0 181 try: 182 # Call gzip externally instead of using gzip package since it's > 2x faster. 183 cmd = ['gzip', '-c', filename] 184 p = subprocess.Popen(cmd, stdout=subprocess.PIPE) 185 # Manually counting bytes instead of using len(p.communicate()[0]) to avoid 186 # buffering the entire compressed data (can be ~100 MB). 187 ret = 0 188 while True: 189 chunk = len(p.stdout.read(BUFFER_SIZE)) 190 if chunk == 0: 191 break 192 ret += chunk 193 return ret 194 except OSError: 195 logging.critical('Failed to get gzipped size: %s', filename) 196 return 0 197 198 199 def _get_catagorized_filesizes(filename): 200 """Measures |filename| sizes under various transforms. 201 202 Returns: A Counter (keyed by _Key_* constants) that stores measured sizes. 203 """ 204 sizes = collections.Counter() 205 sizes[_KEY_RAW] = _get_filesize(filename) 206 sizes[_KEY_GZIPPED] = _get_gzipped_filesize(filename) 207 208 # Pre-assign values for non-ELF, or in case of failure for ELF. 209 sizes[_KEY_STRIPPED] = sizes[_KEY_RAW] 210 sizes[_KEY_STRIPPED_GZIPPED] = sizes[_KEY_GZIPPED] 211 212 if _is_probably_elf(filename) and not _is_unstrippable_elf(filename): 213 try: 214 fd, temp_file = tempfile.mkstemp() 215 os.close(fd) 216 cmd = [EU_STRIP_PATH, filename, '-o', temp_file] 217 subprocess.check_output(cmd) 218 sizes[_KEY_STRIPPED] = _get_filesize(temp_file) 219 sizes[_KEY_STRIPPED_GZIPPED] = _get_gzipped_filesize(temp_file) 220 if sizes[_KEY_STRIPPED] > sizes[_KEY_RAW]: 221 # This weird case has been observed for libwidevinecdm.so. 222 logging.critical('Stripping made things worse for %s' % filename) 223 except subprocess.CalledProcessError: 224 logging.critical('Failed to strip file: %s' % filename) 225 finally: 226 os.unlink(temp_file) 227 return sizes 228 229 230 def _dump_chart_json(output_dir, chartjson): 231 """Writes chart histogram to JSON files. 232 233 Output files: 234 results-chart.json contains the chart JSON. 235 perf_results.json contains histogram JSON for Catapult. 236 237 Args: 238 output_dir: Directory to place the JSON files. 239 chartjson: Source JSON data for output files. 240 """ 241 results_path = os.path.join(output_dir, 'results-chart.json') 242 logging.critical('Dumping chartjson to %s', results_path) 243 with open(results_path, 'w') as json_file: 244 json.dump(chartjson, json_file, indent=2) 245 246 # We would ideally generate a histogram set directly instead of generating 247 # chartjson then converting. However, perf_tests_results_helper is in 248 # //build, which doesn't seem to have any precedent for depending on 249 # anything in Catapult. This can probably be fixed, but since this doesn't 250 # need to be super fast or anything, converting is a good enough solution 251 # for the time being. 252 histogram_result = convert_chart_json.ConvertChartJson(results_path) 253 if histogram_result.returncode != 0: 254 raise Exception('chartjson conversion failed with error: ' + 255 histogram_result.stdout) 256 257 histogram_path = os.path.join(output_dir, 'perf_results.json') 258 logging.critical('Dumping histograms to %s', histogram_path) 259 with open(histogram_path, 'wb') as json_file: 260 json_file.write(histogram_result.stdout) 261 262 263 def _run_resource_sizes(args): 264 """Main flow to extract and output size data.""" 265 chartjson = _BASE_CHART.copy() 266 chartjson.update({ 267 'benchmark_description': 268 ('LaCrOS %s resource size information.' % args.arch) 269 }) 270 report_func = perf_tests_results_helper.ReportPerfResult 271 total_sizes = collections.Counter() 272 273 def report_sizes(sizes, title, track_stripped, track_compressed): 274 report_func(chart_data=chartjson, 275 graph_title=title, 276 trace_title='size', 277 value=sizes[_KEY_RAW], 278 units='bytes') 279 280 if track_stripped: 281 report_func(chart_data=chartjson, 282 graph_title=title + ' (Stripped)', 283 trace_title='size', 284 value=sizes[_KEY_STRIPPED], 285 units='bytes') 286 287 if track_compressed: 288 report_func(chart_data=chartjson, 289 graph_title=title + ' (Gzipped)', 290 trace_title='size', 291 value=sizes[_KEY_GZIPPED], 292 units='bytes') 293 294 if track_stripped and track_compressed: 295 report_func(chart_data=chartjson, 296 graph_title=title + ' (Stripped, Gzipped)', 297 trace_title='size', 298 value=sizes[_KEY_STRIPPED_GZIPPED], 299 units='bytes') 300 301 tracked_groups = _TRACKED_GROUPS.copy() 302 # Architecture amd64 requires artifact nacl_irt_x86_64.nexe. 303 if args.arch == 'amd64': 304 tracked_groups.append( 305 _Group(paths=['nacl_irt_x86_64.nexe'], 306 title='File: nacl_irt_x86_64.nexe')) 307 # Architecture arm32 requires artifact nacl_irt_arm.nexe. 308 elif args.arch == 'arm32': 309 tracked_groups.append( 310 _Group(paths=['nacl_irt_arm.nexe'], title='File: nacl_irt_arm.nexe')) 311 tracked_groups.append( 312 _Group(paths=['nacl_helper_bootstrap'], 313 title='File: nacl_helper_bootstrap')) 314 # TODO(crbug.com/40236427): remove the following part once nacl files 315 # are available. 316 elif args.arch == 'arm64': 317 tracked_groups.remove( 318 _Group(paths=['nacl_helper'], title='File: nacl_helper')) 319 for g in tracked_groups: 320 sizes = sum( 321 map(_get_catagorized_filesizes, _visit_paths(args.out_dir, g.paths)), 322 collections.Counter()) 323 report_sizes(sizes, g.title, g.track_stripped, g.track_compressed) 324 325 # Total compressed size is summed over individual compressed sizes, instead 326 # of concatanating first, then compress everything. This is done for 327 # simplicity. It also gives a conservative size estimate (assuming file 328 # metadata and overheads are negligible). 329 total_sizes += sizes 330 331 report_sizes(total_sizes, 'Total', True, True) 332 333 _dump_chart_json(args.output_dir, chartjson) 334 335 336 def main(): 337 """Parses arguments and runs high level flows.""" 338 argparser = argparse.ArgumentParser(description='Writes LaCrOS size metrics.') 339 340 argparser.add_argument('--chromium-output-directory', 341 dest='out_dir', 342 required=True, 343 type=os.path.realpath, 344 help='Location of the build artifacts.') 345 argparser.add_argument('--arch', 346 required=True, 347 type=str, 348 help='The architecture of lacros, valid values: amd64,' 349 ' arm32, arm64') 350 351 output_group = argparser.add_mutually_exclusive_group() 352 353 output_group.add_argument('--output-dir', 354 default='.', 355 help='Directory to save chartjson to.') 356 357 # Accepted to conform to the isolated script interface, but ignored. 358 argparser.add_argument('--isolated-script-test-filter', 359 help=argparse.SUPPRESS) 360 argparser.add_argument('--isolated-script-test-perf-output', 361 type=os.path.realpath, 362 help=argparse.SUPPRESS) 363 364 output_group.add_argument( 365 '--isolated-script-test-output', 366 type=os.path.realpath, 367 help='File to which results will be written in the simplified JSON ' 368 'output format.') 369 370 args = argparser.parse_args() 371 372 isolated_script_output = {'valid': False, 'failures': []} 373 if args.isolated_script_test_output: 374 test_name = 'lacros_resource_sizes' 375 args.output_dir = os.path.join( 376 os.path.dirname(args.isolated_script_test_output), test_name) 377 if not os.path.exists(args.output_dir): 378 os.makedirs(args.output_dir) 379 380 try: 381 _run_resource_sizes(args) 382 isolated_script_output = {'valid': True, 'failures': []} 383 finally: 384 if args.isolated_script_test_output: 385 results_path = os.path.join(args.output_dir, 'test_results.json') 386 with open(results_path, 'w') as output_file: 387 json.dump(isolated_script_output, output_file) 388 with open(args.isolated_script_test_output, 'w') as output_file: 389 json.dump(isolated_script_output, output_file) 390 result_sink_client = result_sink.TryInitClient() 391 if result_sink_client: 392 status = result_types.PASS 393 if not isolated_script_output['valid']: 394 status = result_types.UNKNOWN 395 elif isolated_script_output['failures']: 396 status = result_types.FAIL 397 result_sink_client.Post(test_name, status, None, None, None) 398 399 400 if __name__ == '__main__': 401 main()