create_app_bundle.py (23608B)
1 #!/usr/bin/env python3 2 # 3 # Copyright 2018 The Chromium Authors 4 # Use of this source code is governed by a BSD-style license that can be 5 # found in the LICENSE file. 6 7 """Create an Android application bundle from one or more bundle modules.""" 8 9 import argparse 10 import concurrent.futures 11 import json 12 import logging 13 import os 14 import posixpath 15 import shutil 16 import sys 17 from xml.etree import ElementTree 18 import zipfile 19 20 sys.path.append( 21 os.path.abspath(os.path.join(os.path.dirname(__file__), os.pardir))) 22 from pylib.utils import dexdump 23 24 import bundletool 25 from util import build_utils 26 from util import manifest_utils 27 from util import resource_utils 28 import action_helpers # build_utils adds //build to sys.path. 29 import zip_helpers 30 31 32 # Location of language-based assets in bundle modules. 33 _LOCALES_SUBDIR = 'assets/locales/' 34 35 # The fallback locale should always have its .pak file included in 36 # the base apk, i.e. not use language-based asset targetting. This ensures 37 # that Chrome won't crash on startup if its bundle is installed on a device 38 # with an unsupported system locale (e.g. fur-rIT). 39 _FALLBACK_LOCALE = 'en-US' 40 41 # List of split dimensions recognized by this tool. 42 _ALL_SPLIT_DIMENSIONS = [ 'ABI', 'SCREEN_DENSITY', 'LANGUAGE' ] 43 44 # Due to historical reasons, certain languages identified by Chromium with a 45 # 3-letters ISO 639-2 code, are mapped to a nearly equivalent 2-letters 46 # ISO 639-1 code instead (due to the fact that older Android releases only 47 # supported the latter when matching resources). 48 # 49 # the same conversion as for Java resources. 50 _SHORTEN_LANGUAGE_CODE_MAP = { 51 'fil': 'tl', # Filipino to Tagalog. 52 } 53 54 # A list of extensions corresponding to files that should never be compressed 55 # in the bundle. This used to be handled by bundletool automatically until 56 # release 0.8.0, which required that this be passed to the BundleConfig 57 # file instead. 58 # 59 # This is the original list, which was taken from aapt2, with 'webp' added to 60 # it (which curiously was missing from the list). 61 _UNCOMPRESSED_FILE_EXTS = [ 62 '3g2', '3gp', '3gpp', '3gpp2', 'aac', 'amr', 'awb', 'git', 'imy', 'jet', 63 'jpeg', 'jpg', 'm4a', 'm4v', 'mid', 'midi', 'mkv', 'mp2', 'mp3', 'mp4', 64 'mpeg', 'mpg', 'ogg', 'png', 'rtttl', 'smf', 'wav', 'webm', 'webp', 'wmv', 65 'xmf' 66 ] 67 68 _COMPONENT_TYPES = ('activity', 'provider', 'receiver', 'service') 69 _DEDUPE_ENTRY_TYPES = _COMPONENT_TYPES + ('activity-alias', 'meta-data') 70 71 _ROTATION_METADATA_KEY = 'com.google.play.apps.signing/RotationConfig.textproto' 72 73 _ALLOWLISTED_NON_BASE_SERVICES = { 74 # Only on API level 33+ which is past the fix for b/169196314. 75 'androidx.pdf.service.PdfDocumentServiceImpl', 76 'androidx.pdf.service.PdfDocumentService', 77 # These need to be burned down - these have likely never fully worked. 78 'com.google.apps.tiktok.concurrent.AndroidFuturesService', 79 'com.google.apps.tiktok.concurrent.InternalForegroundService', 80 } 81 82 83 def _ParseArgs(args): 84 parser = argparse.ArgumentParser() 85 parser.add_argument('--out-bundle', required=True, 86 help='Output bundle zip archive.') 87 parser.add_argument('--module-zips', required=True, 88 help='GN-list of module zip archives.') 89 parser.add_argument( 90 '--pathmap-in-paths', 91 action='append', 92 help='List of module pathmap files.') 93 parser.add_argument( 94 '--module-name', 95 action='append', 96 dest='module_names', 97 help='List of module names.') 98 parser.add_argument( 99 '--pathmap-out-path', help='Path to combined pathmap file for bundle.') 100 parser.add_argument( 101 '--rtxt-in-paths', action='append', help='GN-list of module R.txt files.') 102 parser.add_argument( 103 '--rtxt-out-path', help='Path to combined R.txt file for bundle.') 104 parser.add_argument('--uncompressed-assets', action='append', 105 help='GN-list of uncompressed assets.') 106 parser.add_argument('--compress-dex', 107 action='store_true', 108 help='Compress .dex files') 109 parser.add_argument('--split-dimensions', 110 help="GN-list of split dimensions to support.") 111 parser.add_argument( 112 '--base-module-rtxt-path', 113 help='Optional path to the base module\'s R.txt file, only used with ' 114 'language split dimension.') 115 parser.add_argument( 116 '--base-allowlist-rtxt-path', 117 help='Optional path to an R.txt file, string resources ' 118 'listed there _and_ in --base-module-rtxt-path will ' 119 'be kept in the base bundle module, even if language' 120 ' splitting is enabled.') 121 parser.add_argument('--rotation-config', 122 help='Path to a RotationConfig.textproto') 123 parser.add_argument('--warnings-as-errors', 124 action='store_true', 125 help='Treat all warnings as errors.') 126 127 parser.add_argument( 128 '--validate-services', 129 action='store_true', 130 help='Check if services are in base module if isolatedSplits is enabled.') 131 132 options = parser.parse_args(args) 133 options.module_zips = action_helpers.parse_gn_list(options.module_zips) 134 135 if len(options.module_zips) == 0: 136 parser.error('The module zip list cannot be empty.') 137 if len(options.module_zips) != len(options.module_names): 138 parser.error('# module zips != # names.') 139 if 'base' not in options.module_names: 140 parser.error('Missing base module.') 141 142 # Sort modules for more stable outputs. 143 per_module_values = list( 144 zip(options.module_names, options.module_zips, 145 options.uncompressed_assets, options.rtxt_in_paths, 146 options.pathmap_in_paths)) 147 per_module_values.sort(key=lambda x: (x[0] != 'base', x[0])) 148 options.module_names = [x[0] for x in per_module_values] 149 options.module_zips = [x[1] for x in per_module_values] 150 options.uncompressed_assets = [x[2] for x in per_module_values] 151 options.rtxt_in_paths = [x[3] for x in per_module_values] 152 options.pathmap_in_paths = [x[4] for x in per_module_values] 153 154 options.rtxt_in_paths = action_helpers.parse_gn_list(options.rtxt_in_paths) 155 options.pathmap_in_paths = action_helpers.parse_gn_list( 156 options.pathmap_in_paths) 157 158 # Merge all uncompressed assets into a set. 159 uncompressed_list = [] 160 for entry in action_helpers.parse_gn_list(options.uncompressed_assets): 161 # Each entry has the following format: 'zipPath' or 'srcPath:zipPath' 162 pos = entry.find(':') 163 if pos >= 0: 164 uncompressed_list.append(entry[pos + 1:]) 165 else: 166 uncompressed_list.append(entry) 167 168 options.uncompressed_assets = set(uncompressed_list) 169 170 # Check that all split dimensions are valid 171 if options.split_dimensions: 172 options.split_dimensions = action_helpers.parse_gn_list( 173 options.split_dimensions) 174 for dim in options.split_dimensions: 175 if dim.upper() not in _ALL_SPLIT_DIMENSIONS: 176 parser.error('Invalid split dimension "%s" (expected one of: %s)' % ( 177 dim, ', '.join(x.lower() for x in _ALL_SPLIT_DIMENSIONS))) 178 179 # As a special case, --base-allowlist-rtxt-path can be empty to indicate 180 # that the module doesn't need such a allowlist. That's because it is easier 181 # to check this condition here than through GN rules :-( 182 if options.base_allowlist_rtxt_path == '': 183 options.base_module_rtxt_path = None 184 185 # Check --base-module-rtxt-path and --base-allowlist-rtxt-path usage. 186 if options.base_module_rtxt_path: 187 if not options.base_allowlist_rtxt_path: 188 parser.error( 189 '--base-module-rtxt-path requires --base-allowlist-rtxt-path') 190 if 'language' not in options.split_dimensions: 191 parser.error('--base-module-rtxt-path is only valid with ' 192 'language-based splits.') 193 194 return options 195 196 197 def _MakeSplitDimension(value, enabled): 198 """Return dict modelling a BundleConfig splitDimension entry.""" 199 return {'value': value, 'negate': not enabled} 200 201 202 def _GenerateBundleConfigJson(uncompressed_assets, compress_dex, 203 split_dimensions, base_master_resource_ids): 204 """Generate a dictionary that can be written to a JSON BuildConfig. 205 206 Args: 207 uncompressed_assets: A list or set of file paths under assets/ that always 208 be stored uncompressed. 209 compressed_dex: Boolean, whether to compress .dex. 210 split_dimensions: list of split dimensions. 211 base_master_resource_ids: Optional list of 32-bit resource IDs to keep 212 inside the base module, even when split dimensions are enabled. 213 Returns: 214 A dictionary that can be written as a json file. 215 """ 216 # Compute splitsConfig list. Each item is a dictionary that can have 217 # the following keys: 218 # 'value': One of ['LANGUAGE', 'DENSITY', 'ABI'] 219 # 'negate': Boolean, True to indicate that the bundle should *not* be 220 # split (unused at the moment by this script). 221 222 split_dimensions = [ _MakeSplitDimension(dim, dim in split_dimensions) 223 for dim in _ALL_SPLIT_DIMENSIONS ] 224 225 # Locale-specific pak files stored in bundle splits need not be compressed. 226 uncompressed_globs = [ 227 'assets/locales#lang_*/*.pak', 'assets/fallback-locales/*.pak' 228 ] 229 # normpath to allow for ../ prefix. 230 uncompressed_globs.extend( 231 posixpath.normpath('assets/' + x) for x in uncompressed_assets) 232 # NOTE: Use '**' instead of '*' to work through directories! 233 uncompressed_globs.extend('**.' + ext for ext in _UNCOMPRESSED_FILE_EXTS) 234 if not compress_dex: 235 # Explicit glob required only when using bundletool to create .apks files. 236 # Play Store looks for and respects "uncompressDexFiles" set below. 237 # b/176198991 238 # This is added as a placeholder entry in order to have no effect unless 239 # processed with app_bundle_utils.GenerateBundleApks(). 240 uncompressed_globs.append('classesX.dex') 241 242 data = { 243 'optimizations': { 244 'splitsConfig': { 245 'splitDimension': split_dimensions, 246 }, 247 'uncompressNativeLibraries': { 248 'enabled': True, 249 'alignment': 'PAGE_ALIGNMENT_16K' 250 }, 251 'uncompressDexFiles': { 252 'enabled': True, # Applies only for P+. 253 } 254 }, 255 'compression': { 256 'uncompressedGlob': sorted(uncompressed_globs), 257 }, 258 } 259 260 if base_master_resource_ids: 261 data['master_resources'] = { 262 'resource_ids': list(base_master_resource_ids), 263 } 264 265 return json.dumps(data, indent=2) 266 267 268 def _RewriteLanguageAssetPath(src_path): 269 """Rewrite the destination path of a locale asset for language-based splits. 270 271 Should only be used when generating bundles with language-based splits. 272 This will rewrite paths that look like locales/<locale>.pak into 273 locales#<language>/<locale>.pak, where <language> is the language code 274 from the locale. 275 276 Returns new path. 277 """ 278 if not src_path.startswith(_LOCALES_SUBDIR) or not src_path.endswith('.pak'): 279 return [src_path] 280 281 locale = src_path[len(_LOCALES_SUBDIR):-4] 282 android_locale = resource_utils.ToAndroidLocaleName(locale) 283 284 # The locale format is <lang>-<region> or <lang> or BCP-47 (e.g b+sr+Latn). 285 # Extract the language. 286 pos = android_locale.find('-') 287 if android_locale.startswith('b+'): 288 # If locale is in BCP-47 the language is the second tag (e.g. b+sr+Latn) 289 android_language = android_locale.split('+')[1] 290 elif pos >= 0: 291 android_language = android_locale[:pos] 292 else: 293 android_language = android_locale 294 295 if locale == _FALLBACK_LOCALE: 296 # Fallback locale .pak files must be placed in a different directory 297 # to ensure they are always stored in the base module. 298 result_path = 'assets/fallback-locales/%s.pak' % locale 299 else: 300 # Other language .pak files go into a language-specific asset directory 301 # that bundletool will store in separate split APKs. 302 result_path = 'assets/locales#lang_%s/%s.pak' % (android_language, locale) 303 304 return result_path 305 306 307 def _SplitModuleForAssetTargeting(src_module_zip, tmp_dir, split_dimensions): 308 """Splits assets in a module if needed. 309 310 Args: 311 src_module_zip: input zip module path. 312 tmp_dir: Path to temporary directory, where the new output module might 313 be written to. 314 split_dimensions: list of split dimensions. 315 316 Returns: 317 If the module doesn't need asset targeting, doesn't do anything and 318 returns src_module_zip. Otherwise, create a new module zip archive under 319 tmp_dir with the same file name, but which contains assets paths targeting 320 the proper dimensions. 321 """ 322 split_language = 'LANGUAGE' in split_dimensions 323 if not split_language: 324 # Nothing to target, so return original module path. 325 return src_module_zip 326 327 with zipfile.ZipFile(src_module_zip, 'r') as src_zip: 328 language_files = [ 329 f for f in src_zip.namelist() if f.startswith(_LOCALES_SUBDIR)] 330 331 if not language_files: 332 # Not language-based assets to split in this module. 333 return src_module_zip 334 335 tmp_zip = os.path.join(tmp_dir, os.path.basename(src_module_zip)) 336 with zipfile.ZipFile(tmp_zip, 'w') as dst_zip: 337 for info in src_zip.infolist(): 338 src_path = info.filename 339 is_compressed = info.compress_type != zipfile.ZIP_STORED 340 341 dst_path = src_path 342 if src_path in language_files: 343 dst_path = _RewriteLanguageAssetPath(src_path) 344 345 zip_helpers.add_to_zip_hermetic(dst_zip, 346 dst_path, 347 data=src_zip.read(src_path), 348 compress=is_compressed) 349 350 return tmp_zip 351 352 353 def _GenerateBaseResourcesAllowList(base_module_rtxt_path, 354 base_allowlist_rtxt_path): 355 """Generate a allowlist of base master resource ids. 356 357 Args: 358 base_module_rtxt_path: Path to base module R.txt file. 359 base_allowlist_rtxt_path: Path to base allowlist R.txt file. 360 Returns: 361 list of resource ids. 362 """ 363 ids_map = resource_utils.GenerateStringResourcesAllowList( 364 base_module_rtxt_path, base_allowlist_rtxt_path) 365 return ids_map.keys() 366 367 368 def _ConcatTextFiles(in_paths, out_path): 369 """Concatenate the contents of multiple text files into one. 370 371 The each file contents is preceded by a line containing the original filename. 372 373 Args: 374 in_paths: List of input file paths. 375 out_path: Path to output file. 376 """ 377 with open(out_path, 'w') as out_file: 378 for in_path in in_paths: 379 if not os.path.exists(in_path): 380 continue 381 with open(in_path, 'r') as in_file: 382 out_file.write('-- Contents of {}\n'.format(os.path.basename(in_path))) 383 out_file.write(in_file.read()) 384 385 386 def _LoadPathmap(pathmap_path): 387 """Load the pathmap of obfuscated resource paths. 388 389 Returns: A dict mapping from obfuscated paths to original paths or an 390 empty dict if passed a None |pathmap_path|. 391 """ 392 if pathmap_path is None: 393 return {} 394 395 pathmap = {} 396 with open(pathmap_path, 'r') as f: 397 for line in f: 398 line = line.strip() 399 if line.startswith('--') or line == '': 400 continue 401 original, renamed = line.split(' -> ') 402 pathmap[renamed] = original 403 return pathmap 404 405 406 def _WriteBundlePathmap(module_pathmap_paths, module_names, 407 bundle_pathmap_path): 408 """Combine the contents of module pathmaps into a bundle pathmap. 409 410 This rebases the resource paths inside the module pathmap before adding them 411 to the bundle pathmap. So res/a.xml inside the base module pathmap would be 412 base/res/a.xml in the bundle pathmap. 413 """ 414 with open(bundle_pathmap_path, 'w') as bundle_pathmap_file: 415 for module_pathmap_path, module_name in zip(module_pathmap_paths, 416 module_names): 417 if not os.path.exists(module_pathmap_path): 418 continue 419 module_pathmap = _LoadPathmap(module_pathmap_path) 420 for short_path, long_path in module_pathmap.items(): 421 rebased_long_path = '{}/{}'.format(module_name, long_path) 422 rebased_short_path = '{}/{}'.format(module_name, short_path) 423 line = '{} -> {}\n'.format(rebased_long_path, rebased_short_path) 424 bundle_pathmap_file.write(line) 425 426 427 def _GetManifestForModule(bundle_path, module_name): 428 data = bundletool.RunBundleTool( 429 ['dump', 'manifest', '--bundle', bundle_path, '--module', module_name]) 430 try: 431 return ElementTree.fromstring(data) 432 except ElementTree.ParseError: 433 sys.stderr.write('Failed to parse:\n') 434 sys.stderr.write(data) 435 raise 436 437 438 def _GetComponentNames(manifest, tag_name): 439 android_name = '{%s}name' % manifest_utils.ANDROID_NAMESPACE 440 return [ 441 s.attrib.get(android_name) 442 for s in manifest.iterfind(f'application/{tag_name}') 443 ] 444 445 446 def _ClassesFromZip(module_zip): 447 classes = set() 448 for package in dexdump.Dump(module_zip): 449 for java_package, package_dict in package.items(): 450 java_package += '.' if java_package else '' 451 classes.update(java_package + c for c in package_dict['classes']) 452 return classes 453 454 455 def _ValidateSplits(bundle_path, module_zips): 456 logging.info('Reading manifests and running dexdump') 457 base_zip = next(p for p in module_zips if os.path.basename(p) == 'base.zip') 458 module_names = sorted(os.path.basename(p)[:-len('.zip')] for p in module_zips) 459 # Using threads makes these step go from 7s -> 1s on my machine. 460 with concurrent.futures.ThreadPoolExecutor() as executor: 461 # Create list of classes from the base module's dex. 462 classes_future = executor.submit(_ClassesFromZip, base_zip) 463 464 # Create xmltrees of all module manifests. 465 manifest_futures = [ 466 executor.submit(_GetManifestForModule, bundle_path, n) 467 for n in module_names 468 ] 469 manifests_by_name = dict( 470 zip(module_names, (f.result() for f in manifest_futures))) 471 base_classes = classes_future.result() 472 473 # Collect service names from all split manifests. 474 logging.info('Performing checks') 475 errors = [] 476 477 # Ensure there are no components defined in multiple splits. 478 splits_by_component = {} 479 for module_name, cur_manifest in manifests_by_name.items(): 480 for kind in _DEDUPE_ENTRY_TYPES: 481 for component in _GetComponentNames(cur_manifest, kind): 482 owner_module_name = splits_by_component.setdefault((kind, component), 483 module_name) 484 # Allow services that exist only to keep <meta-data> out of 485 # ApplicationInfo. 486 if (owner_module_name != module_name 487 and not component.endswith('HolderService')): 488 errors.append(f'The {kind} "{component}" appeared in both ' 489 f'{owner_module_name} and {module_name}.') 490 491 # Ensure components defined in base manifest exist in base dex. 492 for (kind, component), module_name in splits_by_component.items(): 493 if module_name == 'base' and kind in _COMPONENT_TYPES: 494 if component not in base_classes: 495 errors.append(f"{component} is defined in the base manfiest, " 496 f"but the class does not exist in the base splits' dex") 497 498 # Remaining checks apply only when isolatedSplits="true". 499 isolated_splits = manifests_by_name['base'].get( 500 f'{{{manifest_utils.ANDROID_NAMESPACE}}}isolatedSplits') 501 if isolated_splits != 'true': 502 return errors 503 504 # Ensure all providers are present in base module. We enforce this because 505 # providers are loaded early in startup, and keeping them in the base module 506 # gives more time for the chrome split to load. 507 for module_name, cur_manifest in manifests_by_name.items(): 508 if module_name == 'base': 509 continue 510 provider_names = _GetComponentNames(cur_manifest, 'provider') 511 for p in provider_names: 512 errors.append(f'Provider {p} should be declared in the base manifest,' 513 f' but is in "{module_name}" module. For details, see ' 514 'https://chromium.googlesource.com/chromium/src/+/main/' 515 'docs/android_isolated_splits.md#contentproviders') 516 517 # Ensure all services are present in base module because service classes are 518 # not found if they are not present in the base module. b/169196314 519 # It is fine if they are defined in split manifests though. 520 for module_name, cur_manifest in manifests_by_name.items(): 521 for service_name in _GetComponentNames(cur_manifest, 'service'): 522 if (service_name not in base_classes 523 and service_name not in _ALLOWLISTED_NON_BASE_SERVICES): 524 errors.append(f'Service {service_name} should be declared in the base' 525 f' manifest, but is in "{module_name}" module. For' 526 ' details, see b/169196314.') 527 528 return errors 529 530 531 def main(args): 532 build_utils.InitLogging('AAB_DEBUG') 533 args = build_utils.ExpandFileArgs(args) 534 options = _ParseArgs(args) 535 536 split_dimensions = [] 537 if options.split_dimensions: 538 split_dimensions = [x.upper() for x in options.split_dimensions] 539 540 541 with build_utils.TempDir() as tmp_dir: 542 logging.info('Splitting locale assets') 543 module_zips = [ 544 _SplitModuleForAssetTargeting(module, tmp_dir, split_dimensions) \ 545 for module in options.module_zips] 546 547 base_master_resource_ids = None 548 if options.base_module_rtxt_path: 549 logging.info('Creating R.txt allowlist') 550 base_master_resource_ids = _GenerateBaseResourcesAllowList( 551 options.base_module_rtxt_path, options.base_allowlist_rtxt_path) 552 553 logging.info('Creating BundleConfig.pb.json') 554 bundle_config = _GenerateBundleConfigJson(options.uncompressed_assets, 555 options.compress_dex, 556 split_dimensions, 557 base_master_resource_ids) 558 559 tmp_bundle = os.path.join(tmp_dir, 'tmp_bundle') 560 561 # Important: bundletool requires that the bundle config file is 562 # named with a .pb.json extension. 563 tmp_bundle_config = tmp_bundle + '.BundleConfig.pb.json' 564 565 with open(tmp_bundle_config, 'w') as f: 566 f.write(bundle_config) 567 568 logging.info('Running bundletool') 569 cmd_args = build_utils.JavaCmd() + [ 570 '-jar', 571 bundletool.BUNDLETOOL_JAR_PATH, 572 'build-bundle', 573 '--modules=' + ','.join(module_zips), 574 '--output=' + tmp_bundle, 575 '--config=' + tmp_bundle_config, 576 ] 577 578 if options.rotation_config: 579 cmd_args += [ 580 f'--metadata-file={_ROTATION_METADATA_KEY}:{options.rotation_config}' 581 ] 582 583 build_utils.CheckOutput( 584 cmd_args, 585 print_stdout=True, 586 print_stderr=True, 587 stderr_filter=build_utils.FilterReflectiveAccessJavaWarnings, 588 fail_on_output=options.warnings_as_errors) 589 590 if options.validate_services: 591 # TODO(crbug.com/40148088): This step takes 0.4s locally for bundles with 592 # isolated splits disabled and 2s for bundles with isolated splits 593 # enabled. Consider making this run in parallel or move into a separate 594 # step before enabling isolated splits by default. 595 logging.info('Validating isolated split manifests') 596 errors = _ValidateSplits(tmp_bundle, module_zips) 597 if errors: 598 sys.stderr.write('Bundle failed sanity checks:\n ') 599 sys.stderr.write('\n '.join(errors)) 600 sys.stderr.write('\n') 601 sys.exit(1) 602 603 logging.info('Writing final output artifacts') 604 shutil.move(tmp_bundle, options.out_bundle) 605 606 if options.rtxt_out_path: 607 _ConcatTextFiles(options.rtxt_in_paths, options.rtxt_out_path) 608 609 if options.pathmap_out_path: 610 _WriteBundlePathmap(options.pathmap_in_paths, options.module_names, 611 options.pathmap_out_path) 612 613 614 if __name__ == '__main__': 615 main(sys.argv[1:])