lto-pgo.configure (15491B)
1 # -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*- 2 # vim: set filetype=python: 3 # This Source Code Form is subject to the terms of the Mozilla Public 4 # License, v. 2.0. If a copy of the MPL was not distributed with this 5 # file, You can obtain one at http://mozilla.org/MPL/2.0/. 6 7 # PGO 8 # ============================================================== 9 10 11 option( 12 "--enable-profile-generate", 13 env="MOZ_PROFILE_GENERATE", 14 nargs="?", 15 choices=("cross",), 16 help="Build a PGO instrumented binary", 17 ) 18 19 enable_profile_generate = depends_if("--enable-profile-generate")(lambda _: True) 20 21 22 imply_option("MOZ_PGO", enable_profile_generate) 23 24 set_config("MOZ_PROFILE_GENERATE", enable_profile_generate) 25 26 set_define("MOZ_PROFILE_GENERATE", enable_profile_generate) 27 28 option( 29 "--enable-profile-use", 30 env="MOZ_PROFILE_USE", 31 nargs="?", 32 choices=("cross",), 33 help="Use a generated profile during the build", 34 ) 35 enable_profile_use = depends_if("--enable-profile-use")(lambda _: True) 36 37 imply_option("MOZ_PGO", enable_profile_use) 38 set_config("MOZ_PROFILE_USE", enable_profile_use) 39 40 llvm_profdata = check_prog( 41 "LLVM_PROFDATA", ["llvm-profdata"], allow_missing=True, paths=clang_search_path 42 ) 43 44 45 # --call-graph-profile-sort is the default behavior for lld, and it proves to be 46 # more efficient than pgo-based orderfile. Don't pass it explicitly because it's 47 # option support differ from one version to the other (w/ or W: argument and the 48 # argument value). 49 @depends(select_linker, target) 50 def pgo_cg_sort(linker, target): 51 return linker and linker.KIND == "lld" and target.os != "OSX" 52 53 54 @depends_if(llvm_profdata, when=enable_profile_use & ~pgo_cg_sort) 55 @checking("whether llvm-profdata supports 'order' subcommand") 56 def llvm_profdata_order(profdata): 57 retcode, _, _ = get_cmd_output(profdata, "order", "--help") 58 return retcode == 0 59 60 61 option( 62 "--with-pgo-profile-path", 63 help="Path to the directory with unmerged profile data to use during the build" 64 ", or to a merged profdata file", 65 nargs=1, 66 ) 67 68 69 @depends( 70 "--with-pgo-profile-path", 71 "--enable-profile-use", 72 llvm_profdata, 73 build_environment, 74 ) 75 @imports("os") 76 def pgo_profile_path(path, pgo_use, profdata, build_env): 77 topobjdir = build_env.topobjdir 78 if topobjdir.endswith("/js/src"): 79 topobjdir = topobjdir[:-7] 80 81 if not path: 82 return os.path.join(topobjdir, "instrumented", "merged.profdata") 83 if path and not pgo_use: 84 die("Pass --enable-profile-use to use --with-pgo-profile-path.") 85 if path and not profdata: 86 die("LLVM_PROFDATA must be set to process the pgo profile.") 87 if not os.path.isfile(path[0]): 88 die("Argument to --with-pgo-profile-path must be a file.") 89 if not os.path.isabs(path[0]): 90 die("Argument to --with-pgo-profile-path must be an absolute path.") 91 return path[0] 92 93 94 set_config("PGO_PROFILE_PATH", pgo_profile_path) 95 96 97 @depends( 98 "--enable-profile-use", 99 pgo_profile_path, 100 llvm_profdata, 101 llvm_profdata_order, 102 build_environment, 103 ) 104 def orderfile_path(profile_use, path, profdata, profdata_order, build_env): 105 if not profile_use: 106 return None 107 108 if not profdata_order: 109 return None 110 111 topobjdir = build_env.topobjdir 112 113 orderfile = os.path.join(topobjdir, "orderfile.txt") 114 check_cmd_output(profdata, "order", path, "-o", orderfile) 115 return orderfile 116 117 118 pgo_temporal = c_compiler.try_compile( 119 flags=["-fprofile-generate", "-mllvm", "-pgo-temporal-instrumentation"], 120 check_msg="whether the C compiler supports temporal instrumentation", 121 when=enable_profile_generate & ~pgo_cg_sort, 122 ) 123 124 125 @depends( 126 c_compiler, 127 select_linker, 128 target, 129 pgo_profile_path, 130 target_is_windows, 131 pgo_temporal, 132 orderfile_path, 133 ) 134 @imports("multiprocessing") 135 def pgo_flags( 136 compiler, 137 linker, 138 target, 139 profdata, 140 target_is_windows, 141 pgo_temporal, 142 orderfile, 143 ): 144 if compiler.type == "gcc": 145 return namespace( 146 gen_cflags=["-fprofile-generate"], 147 gen_ldflags=["-fprofile-generate"], 148 use_cflags=["-fprofile-use", "-fprofile-correction", "-Wcoverage-mismatch"], 149 use_ldflags=["-fprofile-use"], 150 ) 151 152 if compiler.type in ("clang-cl", "clang"): 153 prefix = "" 154 if compiler.type == "clang-cl": 155 prefix = "/clang:" 156 gen_ldflags = None 157 else: 158 gen_ldflags = ["-fprofile-generate"] 159 160 use_ldflags = [] 161 if orderfile: 162 if compiler.type == "clang-cl": 163 use_ldflags += [ 164 "-ORDER:@" + orderfile, 165 "/ignore:4037", # Disable warn missing order symbol 166 ] 167 elif linker.KIND == "ld64" or (linker.KIND == "lld" and target.os == "OSX"): 168 use_ldflags += ["-Wl,-order_file", orderfile] 169 elif linker.KIND == "lld": 170 use_ldflags += [ 171 "-Wl,--symbol-ordering-file", 172 orderfile, 173 "-Wl,--no-warn-symbol-ordering", 174 ] 175 176 if use_ldflags: 177 log.info("Activating PGO-based orderfile") 178 179 gen_cflags = [prefix + "-fprofile-generate"] 180 181 if pgo_temporal: 182 gen_cflags += ["-mllvm", "-pgo-temporal-instrumentation"] 183 184 if target_is_windows: 185 # native llvm-profdata.exe on Windows can't read profile data 186 # if name compression is enabled (which cross-compiling enables 187 # by default) 188 gen_cflags += ["-mllvm", "-enable-name-compression=false"] 189 190 return namespace( 191 gen_cflags=gen_cflags, 192 gen_ldflags=gen_ldflags, 193 use_cflags=[ 194 prefix + "-fprofile-use=%s" % profdata, 195 # Some error messages about mismatched profile data 196 # come in via -Wbackend-plugin, so disable those too. 197 "-Wno-error=backend-plugin", 198 ], 199 use_ldflags=use_ldflags, 200 ) 201 202 203 set_config("PROFILE_GEN_CFLAGS", pgo_flags.gen_cflags) 204 set_config("PROFILE_GEN_LDFLAGS", pgo_flags.gen_ldflags) 205 set_config("PROFILE_USE_CFLAGS", pgo_flags.use_cflags) 206 set_config("PROFILE_USE_LDFLAGS", pgo_flags.use_ldflags) 207 208 option( 209 "--with-pgo-jarlog", 210 help="Use the provided jarlog file when packaging during a profile-use " "build", 211 nargs=1, 212 ) 213 214 set_config("PGO_JARLOG_PATH", depends_if("--with-pgo-jarlog")(lambda p: p)) 215 216 217 @depends("MOZ_PGO", "--enable-profile-use", "--enable-profile-generate", c_compiler) 218 def moz_pgo_rust(pgo, profile_use, profile_generate, c_compiler): 219 if not pgo: 220 return 221 222 # Enabling PGO through MOZ_PGO only and not --enable* flags. 223 if not profile_use and not profile_generate: 224 return 225 226 if profile_use and profile_generate: 227 die("Cannot build with --enable-profile-use and --enable-profile-generate.") 228 229 want_cross = (len(profile_use) and profile_use[0] == "cross") or ( 230 len(profile_generate) and profile_generate[0] == "cross" 231 ) 232 233 if not want_cross: 234 return 235 236 if c_compiler.type == "gcc": 237 die("Cannot use cross-language PGO with GCC.") 238 239 return True 240 241 242 set_config("MOZ_PGO_RUST", moz_pgo_rust) 243 244 # LTO 245 # ============================================================== 246 247 option( 248 "--enable-lto", 249 env="MOZ_LTO", 250 nargs="*", 251 choices=("full", "thin", "cross"), 252 help="Enable LTO", 253 ) 254 255 option( 256 env="MOZ_LD64_KNOWN_GOOD", 257 nargs=1, 258 help="Indicate that ld64 is free of symbol aliasing bugs", 259 ) 260 261 imply_option("MOZ_LD64_KNOWN_GOOD", moz_automation) 262 263 use_fat_lto = cxx_compiler.try_link( 264 ldflags=depends(stlport_libs)( 265 lambda extra: ["-flto", "-ffat-lto-objects"] + (extra or []) 266 ), 267 check_msg="whether the C++ compiler supports fat lto objects", 268 when=depends(select_linker, "--enable-lto", enable_tests)( 269 lambda linker, lto, tests: linker and linker.KIND == "lld" and lto and tests 270 ), 271 ) 272 273 274 @depends( 275 "--enable-lto", 276 c_compiler, 277 select_linker, 278 "MOZ_LD64_KNOWN_GOOD", 279 target, 280 "--enable-profile-generate", 281 pass_manager.enabled, 282 "--enable-profile-use", 283 moz_automation, 284 use_fat_lto, 285 ) 286 @imports("multiprocessing") 287 def lto( 288 values, 289 c_compiler, 290 select_linker, 291 ld64_known_good, 292 target, 293 instrumented_build, 294 pass_manager, 295 pgo_build, 296 moz_automation, 297 fat, 298 ): 299 cflags = [] 300 ldflags = [] 301 enabled = None 302 rust_lto = False 303 304 if not values: 305 return 306 307 # Sanitize LTO modes. 308 if "full" in values and "thin" in values: 309 die("incompatible --enable-lto choices 'full' and 'thin'") 310 311 # If a value was given to --enable-lto, use that. Otherwise, make the lto 312 # mode explicit, using full with gcc, and full or thin with clang depending 313 # on the performance benefit. 314 # Defaulting to full LTO is costly in terms of compilation time, so we only 315 # default to it if MOZ_AUTOMATION and PGO are on, and for some platforms. 316 # Based on speedometer3 scores, full lto + pgo is beneficial for Linux and 317 # Windows for x86_64 targets. 318 if values == () or values == ("cross",): 319 if c_compiler.type == "gcc": 320 values += ("full",) 321 elif ( 322 pgo_build 323 and moz_automation 324 and target.os in ("WINNT", "GNU") 325 and target.cpu == "x86_64" 326 ): 327 values += ("full",) 328 else: 329 values += ("thin",) 330 331 if instrumented_build: 332 log.warning("Disabling LTO because --enable-profile-generate is specified") 333 return 334 335 if c_compiler.type == "gcc": 336 if "cross" in values: 337 die("Cross-language LTO is not supported with GCC.") 338 if "thin" in values: 339 die( 340 "gcc does not support thin LTO. Use `--enable-lto` " 341 "to enable full LTO for gcc." 342 ) 343 344 if ( 345 target.kernel == "Darwin" 346 and "cross" in values 347 and select_linker.KIND == "ld64" 348 and not ld64_known_good 349 ): 350 die( 351 "The Mac linker is known to have a bug that affects cross-language " 352 "LTO. If you know that your linker is free from this bug, please " 353 "set the environment variable `MOZ_LD64_KNOWN_GOOD=1` and re-run " 354 "configure." 355 ) 356 357 if c_compiler.type == "clang": 358 if "full" in values: 359 cflags.append("-flto") 360 ldflags.append("-flto") 361 else: 362 cflags.append("-flto=thin") 363 ldflags.append("-flto=thin") 364 365 if target.os == "Android" and "cross" in values: 366 # Work around https://github.com/rust-lang/rust/issues/90088 367 # by enabling the highest level of SSE the rust targets default 368 # to. 369 # https://github.com/rust-lang/rust/blob/bdfcb88e8b6203ccb46a2fb6649979b773efc8ac/compiler/rustc_target/src/spec/i686_linux_android.rs#L13 370 # https://github.com/rust-lang/rust/blob/8d1083e319841624f64400e1524805a40d725439/compiler/rustc_target/src/spec/x86_64_linux_android.rs#L7 371 if target.cpu == "x86": 372 ldflags.append("-Wl,-plugin-opt=-mattr=+ssse3") 373 elif target.cpu == "x86_64": 374 ldflags.append("-Wl,-plugin-opt=-mattr=+sse4.2") 375 elif c_compiler.type == "clang-cl": 376 if "full" in values: 377 cflags.append("-flto") 378 else: 379 cflags.append("-flto=thin") 380 # With clang-cl, -flto can only be used with -c or -fuse-ld=lld. 381 # AC_TRY_LINKs during configure don't have -c, so pass -fuse-ld=lld. 382 cflags.append("-fuse-ld=lld") 383 384 # Explicitly set the CPU to optimize for so the linker doesn't 385 # choose a poor default. Rust compilation by default uses the 386 # pentium4 CPU on x86: 387 # 388 # https://github.com/rust-lang/rust/blob/049a49b91151a88c95fa0d62a53fd0a0ac2c3af9/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs#L5 389 # 390 # which specifically supports "long" (multi-byte) nops. See 391 # https://bugzilla.mozilla.org/show_bug.cgi?id=1568450#c8 for details. 392 # 393 # The pentium4 seems like kind of a weird CPU to optimize for, but 394 # it seems to have worked out OK thus far. LLVM does not seem to 395 # specifically schedule code for the pentium4's deep pipeline, so 396 # that probably contributes to it being an OK default for our 397 # purposes. 398 if target.cpu == "x86": 399 ldflags.append("-mllvm:-mcpu=pentium4") 400 # This is also the CPU that Rust uses. The LLVM source code 401 # recommends this as the "generic 64-bit specific x86 processor model": 402 # 403 # https://github.com/llvm/llvm-project/blob/e7694f34ab6a12b8bb480cbfcb396d0a64fe965f/llvm/lib/Target/X86/X86.td#L1165-L1187 404 if target.cpu == "x86_64": 405 ldflags.append("-mllvm:-mcpu=x86-64") 406 # We do not need special flags for arm64. Hooray for fixed-length 407 # instruction sets. 408 else: 409 num_cores = multiprocessing.cpu_count() 410 cflags.append("-flto") 411 cflags.append("-flifetime-dse=1") 412 413 ldflags.append("-flto=%s" % num_cores) 414 ldflags.append("-flifetime-dse=1") 415 416 # Tell LTO not to inline functions above a certain size, to mitigate 417 # binary size growth while still getting good performance. 418 # (For hot functions, PGO will put a multiplier on this limit.) 419 if c_compiler.type == "clang-cl": 420 ldflags.append("-mllvm:-import-instr-limit=10") 421 elif target.kernel == "Darwin": 422 ldflags.append("-Wl,-mllvm,-import-instr-limit=10") 423 elif c_compiler.type == "clang": 424 ldflags.append("-Wl,-plugin-opt=-import-instr-limit=10") 425 426 # If we're using the new pass manager, we can also enable the new PM 427 # during LTO. Further we can use the resulting size savings to increase 428 # the import limit in hot functions. 429 if pass_manager: 430 if c_compiler.type == "clang-cl": 431 if c_compiler.version >= "12.0.0" and c_compiler.version < "13.0.0": 432 ldflags.append("-opt:ltonewpassmanager") 433 if c_compiler.version >= "12.0.0": 434 ldflags.append("-mllvm:-import-hot-multiplier=30") 435 elif target.kernel == "Darwin": 436 ldflags.append("-Wl,-mllvm,-import-hot-multiplier=30") 437 else: 438 if c_compiler.version < "13.0.0": 439 ldflags.append("-Wl,-plugin-opt=new-pass-manager") 440 ldflags.append("-Wl,-plugin-opt=-import-hot-multiplier=30") 441 442 # Pick Rust LTO mode in case of cross lTO. Thin is the default. 443 if "cross" in values: 444 rust_lto = "full" if "full" in values else "thin" 445 else: 446 rust_lto = "" 447 448 if fat: 449 cflags.append("-ffat-lto-objects") 450 ldflags.append("-ffat-lto-objects") 451 452 return namespace( 453 enabled=True, 454 fat=fat, 455 cflags=cflags, 456 ldflags=ldflags, 457 rust_lto=rust_lto, 458 ) 459 460 461 @depends( 462 dso_flags, 463 when=building_with_gnu_compatible_cc 464 & gcc_use_gnu_ld 465 & ~developer_options 466 & ~enable_profile_generate, 467 ) 468 def remove_dead_symbols(dso_flags): 469 dso_flags.ldopts.append("-Wl,--gc-sections") 470 471 472 set_config("MOZ_LTO", lto.enabled) 473 set_define("MOZ_LTO", lto.enabled) 474 set_config("MOZ_LTO_CFLAGS", lto.cflags) 475 set_config("MOZ_LTO_LDFLAGS", lto.ldflags) 476 set_config("MOZ_LTO_RUST_CROSS", lto.rust_lto) 477 set_config("MOZ_LTO_FAT", lto.fat)