tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

lto-pgo.configure (15491B)


      1 # -*- Mode: python; indent-tabs-mode: nil; tab-width: 40 -*-
      2 # vim: set filetype=python:
      3 # This Source Code Form is subject to the terms of the Mozilla Public
      4 # License, v. 2.0. If a copy of the MPL was not distributed with this
      5 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      6 
      7 # PGO
      8 # ==============================================================
      9 
     10 
     11 option(
     12     "--enable-profile-generate",
     13     env="MOZ_PROFILE_GENERATE",
     14     nargs="?",
     15     choices=("cross",),
     16     help="Build a PGO instrumented binary",
     17 )
     18 
     19 enable_profile_generate = depends_if("--enable-profile-generate")(lambda _: True)
     20 
     21 
     22 imply_option("MOZ_PGO", enable_profile_generate)
     23 
     24 set_config("MOZ_PROFILE_GENERATE", enable_profile_generate)
     25 
     26 set_define("MOZ_PROFILE_GENERATE", enable_profile_generate)
     27 
     28 option(
     29     "--enable-profile-use",
     30     env="MOZ_PROFILE_USE",
     31     nargs="?",
     32     choices=("cross",),
     33     help="Use a generated profile during the build",
     34 )
     35 enable_profile_use = depends_if("--enable-profile-use")(lambda _: True)
     36 
     37 imply_option("MOZ_PGO", enable_profile_use)
     38 set_config("MOZ_PROFILE_USE", enable_profile_use)
     39 
     40 llvm_profdata = check_prog(
     41     "LLVM_PROFDATA", ["llvm-profdata"], allow_missing=True, paths=clang_search_path
     42 )
     43 
     44 
     45 # --call-graph-profile-sort is the default behavior for lld, and it proves to be
     46 # more efficient than pgo-based orderfile. Don't pass it explicitly because it's
     47 # option support differ from one version to the other (w/ or W: argument and the
     48 # argument value).
     49 @depends(select_linker, target)
     50 def pgo_cg_sort(linker, target):
     51     return linker and linker.KIND == "lld" and target.os != "OSX"
     52 
     53 
     54 @depends_if(llvm_profdata, when=enable_profile_use & ~pgo_cg_sort)
     55 @checking("whether llvm-profdata supports 'order' subcommand")
     56 def llvm_profdata_order(profdata):
     57     retcode, _, _ = get_cmd_output(profdata, "order", "--help")
     58     return retcode == 0
     59 
     60 
     61 option(
     62     "--with-pgo-profile-path",
     63     help="Path to the directory with unmerged profile data to use during the build"
     64     ", or to a merged profdata file",
     65     nargs=1,
     66 )
     67 
     68 
     69 @depends(
     70     "--with-pgo-profile-path",
     71     "--enable-profile-use",
     72     llvm_profdata,
     73     build_environment,
     74 )
     75 @imports("os")
     76 def pgo_profile_path(path, pgo_use, profdata, build_env):
     77     topobjdir = build_env.topobjdir
     78     if topobjdir.endswith("/js/src"):
     79         topobjdir = topobjdir[:-7]
     80 
     81     if not path:
     82         return os.path.join(topobjdir, "instrumented", "merged.profdata")
     83     if path and not pgo_use:
     84         die("Pass --enable-profile-use to use --with-pgo-profile-path.")
     85     if path and not profdata:
     86         die("LLVM_PROFDATA must be set to process the pgo profile.")
     87     if not os.path.isfile(path[0]):
     88         die("Argument to --with-pgo-profile-path must be a file.")
     89     if not os.path.isabs(path[0]):
     90         die("Argument to --with-pgo-profile-path must be an absolute path.")
     91     return path[0]
     92 
     93 
     94 set_config("PGO_PROFILE_PATH", pgo_profile_path)
     95 
     96 
     97 @depends(
     98     "--enable-profile-use",
     99     pgo_profile_path,
    100     llvm_profdata,
    101     llvm_profdata_order,
    102     build_environment,
    103 )
    104 def orderfile_path(profile_use, path, profdata, profdata_order, build_env):
    105     if not profile_use:
    106         return None
    107 
    108     if not profdata_order:
    109         return None
    110 
    111     topobjdir = build_env.topobjdir
    112 
    113     orderfile = os.path.join(topobjdir, "orderfile.txt")
    114     check_cmd_output(profdata, "order", path, "-o", orderfile)
    115     return orderfile
    116 
    117 
    118 pgo_temporal = c_compiler.try_compile(
    119     flags=["-fprofile-generate", "-mllvm", "-pgo-temporal-instrumentation"],
    120     check_msg="whether the C compiler supports temporal instrumentation",
    121     when=enable_profile_generate & ~pgo_cg_sort,
    122 )
    123 
    124 
    125 @depends(
    126     c_compiler,
    127     select_linker,
    128     target,
    129     pgo_profile_path,
    130     target_is_windows,
    131     pgo_temporal,
    132     orderfile_path,
    133 )
    134 @imports("multiprocessing")
    135 def pgo_flags(
    136     compiler,
    137     linker,
    138     target,
    139     profdata,
    140     target_is_windows,
    141     pgo_temporal,
    142     orderfile,
    143 ):
    144     if compiler.type == "gcc":
    145         return namespace(
    146             gen_cflags=["-fprofile-generate"],
    147             gen_ldflags=["-fprofile-generate"],
    148             use_cflags=["-fprofile-use", "-fprofile-correction", "-Wcoverage-mismatch"],
    149             use_ldflags=["-fprofile-use"],
    150         )
    151 
    152     if compiler.type in ("clang-cl", "clang"):
    153         prefix = ""
    154         if compiler.type == "clang-cl":
    155             prefix = "/clang:"
    156             gen_ldflags = None
    157         else:
    158             gen_ldflags = ["-fprofile-generate"]
    159 
    160         use_ldflags = []
    161         if orderfile:
    162             if compiler.type == "clang-cl":
    163                 use_ldflags += [
    164                     "-ORDER:@" + orderfile,
    165                     "/ignore:4037",  # Disable warn missing order symbol
    166                 ]
    167             elif linker.KIND == "ld64" or (linker.KIND == "lld" and target.os == "OSX"):
    168                 use_ldflags += ["-Wl,-order_file", orderfile]
    169             elif linker.KIND == "lld":
    170                 use_ldflags += [
    171                     "-Wl,--symbol-ordering-file",
    172                     orderfile,
    173                     "-Wl,--no-warn-symbol-ordering",
    174                 ]
    175 
    176             if use_ldflags:
    177                 log.info("Activating PGO-based orderfile")
    178 
    179         gen_cflags = [prefix + "-fprofile-generate"]
    180 
    181         if pgo_temporal:
    182             gen_cflags += ["-mllvm", "-pgo-temporal-instrumentation"]
    183 
    184         if target_is_windows:
    185             # native llvm-profdata.exe on Windows can't read profile data
    186             # if name compression is enabled (which cross-compiling enables
    187             # by default)
    188             gen_cflags += ["-mllvm", "-enable-name-compression=false"]
    189 
    190         return namespace(
    191             gen_cflags=gen_cflags,
    192             gen_ldflags=gen_ldflags,
    193             use_cflags=[
    194                 prefix + "-fprofile-use=%s" % profdata,
    195                 # Some error messages about mismatched profile data
    196                 # come in via -Wbackend-plugin, so disable those too.
    197                 "-Wno-error=backend-plugin",
    198             ],
    199             use_ldflags=use_ldflags,
    200         )
    201 
    202 
    203 set_config("PROFILE_GEN_CFLAGS", pgo_flags.gen_cflags)
    204 set_config("PROFILE_GEN_LDFLAGS", pgo_flags.gen_ldflags)
    205 set_config("PROFILE_USE_CFLAGS", pgo_flags.use_cflags)
    206 set_config("PROFILE_USE_LDFLAGS", pgo_flags.use_ldflags)
    207 
    208 option(
    209     "--with-pgo-jarlog",
    210     help="Use the provided jarlog file when packaging during a profile-use " "build",
    211     nargs=1,
    212 )
    213 
    214 set_config("PGO_JARLOG_PATH", depends_if("--with-pgo-jarlog")(lambda p: p))
    215 
    216 
    217 @depends("MOZ_PGO", "--enable-profile-use", "--enable-profile-generate", c_compiler)
    218 def moz_pgo_rust(pgo, profile_use, profile_generate, c_compiler):
    219     if not pgo:
    220         return
    221 
    222     # Enabling PGO through MOZ_PGO only and not --enable* flags.
    223     if not profile_use and not profile_generate:
    224         return
    225 
    226     if profile_use and profile_generate:
    227         die("Cannot build with --enable-profile-use and --enable-profile-generate.")
    228 
    229     want_cross = (len(profile_use) and profile_use[0] == "cross") or (
    230         len(profile_generate) and profile_generate[0] == "cross"
    231     )
    232 
    233     if not want_cross:
    234         return
    235 
    236     if c_compiler.type == "gcc":
    237         die("Cannot use cross-language PGO with GCC.")
    238 
    239     return True
    240 
    241 
    242 set_config("MOZ_PGO_RUST", moz_pgo_rust)
    243 
    244 # LTO
    245 # ==============================================================
    246 
    247 option(
    248     "--enable-lto",
    249     env="MOZ_LTO",
    250     nargs="*",
    251     choices=("full", "thin", "cross"),
    252     help="Enable LTO",
    253 )
    254 
    255 option(
    256     env="MOZ_LD64_KNOWN_GOOD",
    257     nargs=1,
    258     help="Indicate that ld64 is free of symbol aliasing bugs",
    259 )
    260 
    261 imply_option("MOZ_LD64_KNOWN_GOOD", moz_automation)
    262 
    263 use_fat_lto = cxx_compiler.try_link(
    264     ldflags=depends(stlport_libs)(
    265         lambda extra: ["-flto", "-ffat-lto-objects"] + (extra or [])
    266     ),
    267     check_msg="whether the C++ compiler supports fat lto objects",
    268     when=depends(select_linker, "--enable-lto", enable_tests)(
    269         lambda linker, lto, tests: linker and linker.KIND == "lld" and lto and tests
    270     ),
    271 )
    272 
    273 
    274 @depends(
    275     "--enable-lto",
    276     c_compiler,
    277     select_linker,
    278     "MOZ_LD64_KNOWN_GOOD",
    279     target,
    280     "--enable-profile-generate",
    281     pass_manager.enabled,
    282     "--enable-profile-use",
    283     moz_automation,
    284     use_fat_lto,
    285 )
    286 @imports("multiprocessing")
    287 def lto(
    288     values,
    289     c_compiler,
    290     select_linker,
    291     ld64_known_good,
    292     target,
    293     instrumented_build,
    294     pass_manager,
    295     pgo_build,
    296     moz_automation,
    297     fat,
    298 ):
    299     cflags = []
    300     ldflags = []
    301     enabled = None
    302     rust_lto = False
    303 
    304     if not values:
    305         return
    306 
    307     # Sanitize LTO modes.
    308     if "full" in values and "thin" in values:
    309         die("incompatible --enable-lto choices 'full' and 'thin'")
    310 
    311     # If a value was given to --enable-lto, use that.  Otherwise, make the lto
    312     # mode explicit, using full with gcc, and full or thin with clang depending
    313     # on the performance benefit.
    314     # Defaulting to full LTO is costly in terms of compilation time, so we only
    315     # default to it if MOZ_AUTOMATION and PGO are on, and for some platforms.
    316     # Based on speedometer3 scores, full lto + pgo is beneficial for Linux and
    317     # Windows for x86_64 targets.
    318     if values == () or values == ("cross",):
    319         if c_compiler.type == "gcc":
    320             values += ("full",)
    321         elif (
    322             pgo_build
    323             and moz_automation
    324             and target.os in ("WINNT", "GNU")
    325             and target.cpu == "x86_64"
    326         ):
    327             values += ("full",)
    328         else:
    329             values += ("thin",)
    330 
    331     if instrumented_build:
    332         log.warning("Disabling LTO because --enable-profile-generate is specified")
    333         return
    334 
    335     if c_compiler.type == "gcc":
    336         if "cross" in values:
    337             die("Cross-language LTO is not supported with GCC.")
    338         if "thin" in values:
    339             die(
    340                 "gcc does not support thin LTO. Use `--enable-lto` "
    341                 "to enable full LTO for gcc."
    342             )
    343 
    344     if (
    345         target.kernel == "Darwin"
    346         and "cross" in values
    347         and select_linker.KIND == "ld64"
    348         and not ld64_known_good
    349     ):
    350         die(
    351             "The Mac linker is known to have a bug that affects cross-language "
    352             "LTO.  If you know that your linker is free from this bug, please "
    353             "set the environment variable `MOZ_LD64_KNOWN_GOOD=1` and re-run "
    354             "configure."
    355         )
    356 
    357     if c_compiler.type == "clang":
    358         if "full" in values:
    359             cflags.append("-flto")
    360             ldflags.append("-flto")
    361         else:
    362             cflags.append("-flto=thin")
    363             ldflags.append("-flto=thin")
    364 
    365         if target.os == "Android" and "cross" in values:
    366             # Work around https://github.com/rust-lang/rust/issues/90088
    367             # by enabling the highest level of SSE the rust targets default
    368             # to.
    369             # https://github.com/rust-lang/rust/blob/bdfcb88e8b6203ccb46a2fb6649979b773efc8ac/compiler/rustc_target/src/spec/i686_linux_android.rs#L13
    370             # https://github.com/rust-lang/rust/blob/8d1083e319841624f64400e1524805a40d725439/compiler/rustc_target/src/spec/x86_64_linux_android.rs#L7
    371             if target.cpu == "x86":
    372                 ldflags.append("-Wl,-plugin-opt=-mattr=+ssse3")
    373             elif target.cpu == "x86_64":
    374                 ldflags.append("-Wl,-plugin-opt=-mattr=+sse4.2")
    375     elif c_compiler.type == "clang-cl":
    376         if "full" in values:
    377             cflags.append("-flto")
    378         else:
    379             cflags.append("-flto=thin")
    380         # With clang-cl, -flto can only be used with -c or -fuse-ld=lld.
    381         # AC_TRY_LINKs during configure don't have -c, so pass -fuse-ld=lld.
    382         cflags.append("-fuse-ld=lld")
    383 
    384         # Explicitly set the CPU to optimize for so the linker doesn't
    385         # choose a poor default.  Rust compilation by default uses the
    386         # pentium4 CPU on x86:
    387         #
    388         # https://github.com/rust-lang/rust/blob/049a49b91151a88c95fa0d62a53fd0a0ac2c3af9/compiler/rustc_target/src/spec/i686_pc_windows_msvc.rs#L5
    389         #
    390         # which specifically supports "long" (multi-byte) nops.  See
    391         # https://bugzilla.mozilla.org/show_bug.cgi?id=1568450#c8 for details.
    392         #
    393         # The pentium4 seems like kind of a weird CPU to optimize for, but
    394         # it seems to have worked out OK thus far.  LLVM does not seem to
    395         # specifically schedule code for the pentium4's deep pipeline, so
    396         # that probably contributes to it being an OK default for our
    397         # purposes.
    398         if target.cpu == "x86":
    399             ldflags.append("-mllvm:-mcpu=pentium4")
    400         # This is also the CPU that Rust uses.  The LLVM source code
    401         # recommends this as the "generic 64-bit specific x86 processor model":
    402         #
    403         # https://github.com/llvm/llvm-project/blob/e7694f34ab6a12b8bb480cbfcb396d0a64fe965f/llvm/lib/Target/X86/X86.td#L1165-L1187
    404         if target.cpu == "x86_64":
    405             ldflags.append("-mllvm:-mcpu=x86-64")
    406         # We do not need special flags for arm64.  Hooray for fixed-length
    407         # instruction sets.
    408     else:
    409         num_cores = multiprocessing.cpu_count()
    410         cflags.append("-flto")
    411         cflags.append("-flifetime-dse=1")
    412 
    413         ldflags.append("-flto=%s" % num_cores)
    414         ldflags.append("-flifetime-dse=1")
    415 
    416     # Tell LTO not to inline functions above a certain size, to mitigate
    417     # binary size growth while still getting good performance.
    418     # (For hot functions, PGO will put a multiplier on this limit.)
    419     if c_compiler.type == "clang-cl":
    420         ldflags.append("-mllvm:-import-instr-limit=10")
    421     elif target.kernel == "Darwin":
    422         ldflags.append("-Wl,-mllvm,-import-instr-limit=10")
    423     elif c_compiler.type == "clang":
    424         ldflags.append("-Wl,-plugin-opt=-import-instr-limit=10")
    425 
    426     # If we're using the new pass manager, we can also enable the new PM
    427     # during LTO. Further we can use the resulting size savings to increase
    428     # the import limit in hot functions.
    429     if pass_manager:
    430         if c_compiler.type == "clang-cl":
    431             if c_compiler.version >= "12.0.0" and c_compiler.version < "13.0.0":
    432                 ldflags.append("-opt:ltonewpassmanager")
    433             if c_compiler.version >= "12.0.0":
    434                 ldflags.append("-mllvm:-import-hot-multiplier=30")
    435         elif target.kernel == "Darwin":
    436             ldflags.append("-Wl,-mllvm,-import-hot-multiplier=30")
    437         else:
    438             if c_compiler.version < "13.0.0":
    439                 ldflags.append("-Wl,-plugin-opt=new-pass-manager")
    440             ldflags.append("-Wl,-plugin-opt=-import-hot-multiplier=30")
    441 
    442     # Pick Rust LTO mode in case of cross lTO. Thin is the default.
    443     if "cross" in values:
    444         rust_lto = "full" if "full" in values else "thin"
    445     else:
    446         rust_lto = ""
    447 
    448     if fat:
    449         cflags.append("-ffat-lto-objects")
    450         ldflags.append("-ffat-lto-objects")
    451 
    452     return namespace(
    453         enabled=True,
    454         fat=fat,
    455         cflags=cflags,
    456         ldflags=ldflags,
    457         rust_lto=rust_lto,
    458     )
    459 
    460 
    461 @depends(
    462     dso_flags,
    463     when=building_with_gnu_compatible_cc
    464     & gcc_use_gnu_ld
    465     & ~developer_options
    466     & ~enable_profile_generate,
    467 )
    468 def remove_dead_symbols(dso_flags):
    469     dso_flags.ldopts.append("-Wl,--gc-sections")
    470 
    471 
    472 set_config("MOZ_LTO", lto.enabled)
    473 set_define("MOZ_LTO", lto.enabled)
    474 set_config("MOZ_LTO_CFLAGS", lto.cflags)
    475 set_config("MOZ_LTO_LDFLAGS", lto.ldflags)
    476 set_config("MOZ_LTO_RUST_CROSS", lto.rust_lto)
    477 set_config("MOZ_LTO_FAT", lto.fat)