tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

icu_sources_data.py (11810B)


      1 #!/usr/bin/env python
      2 #
      3 # Any copyright is dedicated to the Public Domain.
      4 # http://creativecommons.org/publicdomain/zero/1.0/
      5 #
      6 # Generate SOURCES in sources.mozbuild files from ICU's Makefile.in
      7 # files, and also build a standalone copy of ICU using its build
      8 # system to generate a new copy of the in-tree ICU data file.
      9 #
     10 # This script expects to be run from `update-icu.sh` after the in-tree
     11 # copy of ICU has been updated.
     12 
     13 import glob
     14 import os
     15 import shutil
     16 import subprocess
     17 import sys
     18 import tempfile
     19 
     20 from mozbuild.util import cpu_count
     21 from mozpack import path as mozpath
     22 
     23 # The following files have been determined to be dead/unused by a
     24 # semi-automated analysis. You can just remove any of the files below
     25 # if you need them. However, files marked with a "Cluster" comment
     26 # can only be removed together, as they have (directional) dependencies.
     27 # If you want to rerun this analysis, contact :decoder.
     28 UNUSED_SOURCES = set(
     29    [
     30        "intl/icu/source/common/bytestrieiterator.cpp",
     31        "intl/icu/source/common/cstr.cpp",
     32        "intl/icu/source/common/cwchar.cpp",
     33        "intl/icu/source/common/icudataver.cpp",
     34        "intl/icu/source/common/icuplug.cpp",
     35        "intl/icu/source/common/pluralmap.cpp",
     36        "intl/icu/source/common/ucat.cpp",
     37        "intl/icu/source/common/ucnv2022.cpp",
     38        "intl/icu/source/common/ucnv_ct.cpp",
     39        "intl/icu/source/common/ucnvdisp.cpp",
     40        "intl/icu/source/common/ucnv_ext.cpp",
     41        "intl/icu/source/common/ucnvhz.cpp",
     42        "intl/icu/source/common/ucnvisci.cpp",
     43        "intl/icu/source/common/ucnv_lmb.cpp",
     44        "intl/icu/source/common/ucnvmbcs.cpp",
     45        "intl/icu/source/common/uidna.cpp",
     46        "intl/icu/source/common/unorm.cpp",
     47        "intl/icu/source/common/usc_impl.cpp",
     48        "intl/icu/source/common/ustr_wcs.cpp",
     49        "intl/icu/source/i18n/anytrans.cpp",
     50        "intl/icu/source/i18n/brktrans.cpp",
     51        "intl/icu/source/i18n/casetrn.cpp",
     52        "intl/icu/source/i18n/cpdtrans.cpp",
     53        "intl/icu/source/i18n/esctrn.cpp",
     54        "intl/icu/source/i18n/fmtable_cnv.cpp",
     55        "intl/icu/source/i18n/funcrepl.cpp",
     56        "intl/icu/source/i18n/gender.cpp",
     57        "intl/icu/source/i18n/name2uni.cpp",
     58        "intl/icu/source/i18n/nortrans.cpp",
     59        "intl/icu/source/i18n/nultrans.cpp",
     60        "intl/icu/source/i18n/quant.cpp",
     61        "intl/icu/source/i18n/rbt.cpp",
     62        "intl/icu/source/i18n/rbt_data.cpp",
     63        "intl/icu/source/i18n/rbt_pars.cpp",
     64        "intl/icu/source/i18n/rbt_rule.cpp",
     65        "intl/icu/source/i18n/rbt_set.cpp",
     66        "intl/icu/source/i18n/regexcmp.cpp",
     67        "intl/icu/source/i18n/regeximp.cpp",
     68        "intl/icu/source/i18n/regexst.cpp",
     69        "intl/icu/source/i18n/regextxt.cpp",
     70        "intl/icu/source/i18n/rematch.cpp",
     71        "intl/icu/source/i18n/remtrans.cpp",
     72        "intl/icu/source/i18n/repattrn.cpp",
     73        "intl/icu/source/i18n/scientificnumberformatter.cpp",
     74        "intl/icu/source/i18n/strmatch.cpp",
     75        "intl/icu/source/i18n/strrepl.cpp",
     76        "intl/icu/source/i18n/titletrn.cpp",
     77        "intl/icu/source/i18n/tolowtrn.cpp",
     78        "intl/icu/source/i18n/toupptrn.cpp",
     79        "intl/icu/source/i18n/translit.cpp",
     80        "intl/icu/source/i18n/transreg.cpp",
     81        "intl/icu/source/i18n/tridpars.cpp",
     82        "intl/icu/source/i18n/unesctrn.cpp",
     83        "intl/icu/source/i18n/uni2name.cpp",
     84        "intl/icu/source/i18n/uregexc.cpp",
     85        "intl/icu/source/i18n/uregex.cpp",
     86        "intl/icu/source/i18n/uregion.cpp",
     87        "intl/icu/source/i18n/uspoof_build.cpp",
     88        "intl/icu/source/i18n/uspoof_conf.cpp",
     89        "intl/icu/source/i18n/utrans.cpp",
     90        "intl/icu/source/i18n/vzone.cpp",
     91        "intl/icu/source/i18n/zrule.cpp",
     92        "intl/icu/source/i18n/ztrans.cpp",
     93        # Cluster
     94        "intl/icu/source/common/resbund_cnv.cpp",
     95        "intl/icu/source/common/ures_cnv.cpp",
     96        # Cluster
     97        "intl/icu/source/common/propsvec.cpp",
     98        "intl/icu/source/common/ucnvsel.cpp",
     99        "intl/icu/source/common/ucnv_set.cpp",
    100        # Cluster
    101        "intl/icu/source/common/ubiditransform.cpp",
    102        "intl/icu/source/common/ushape.cpp",
    103        # Cluster
    104        "intl/icu/source/i18n/csdetect.cpp",
    105        "intl/icu/source/i18n/csmatch.cpp",
    106        "intl/icu/source/i18n/csr2022.cpp",
    107        "intl/icu/source/i18n/csrecog.cpp",
    108        "intl/icu/source/i18n/csrmbcs.cpp",
    109        "intl/icu/source/i18n/csrsbcs.cpp",
    110        "intl/icu/source/i18n/csrucode.cpp",
    111        "intl/icu/source/i18n/csrutf8.cpp",
    112        "intl/icu/source/i18n/inputext.cpp",
    113        "intl/icu/source/i18n/ucsdet.cpp",
    114        # Cluster
    115        "intl/icu/source/i18n/alphaindex.cpp",
    116        "intl/icu/source/i18n/ulocdata.cpp",
    117    ]
    118    +
    119    # We use the version of double-conversion vendored in mfbt instead
    120    [
    121        "intl/icu/source/i18n/double-conversion-bignum-dtoa.cpp",
    122        "intl/icu/source/i18n/double-conversion-bignum-dtoa.h",
    123        "intl/icu/source/i18n/double-conversion-bignum.cpp",
    124        "intl/icu/source/i18n/double-conversion-bignum.h",
    125        "intl/icu/source/i18n/double-conversion-cached-powers.cpp",
    126        "intl/icu/source/i18n/double-conversion-cached-powers.h",
    127        "intl/icu/source/i18n/double-conversion-diy-fp.h",
    128        "intl/icu/source/i18n/double-conversion-double-to-string.cpp",
    129        "intl/icu/source/i18n/double-conversion-double-to-string.h",
    130        "intl/icu/source/i18n/double-conversion-fast-dtoa.cpp",
    131        "intl/icu/source/i18n/double-conversion-fast-dtoa.h",
    132        "intl/icu/source/i18n/double-conversion-ieee.h",
    133        "intl/icu/source/i18n/double-conversion-string-to-double.cpp",
    134        "intl/icu/source/i18n/double-conversion-string-to-double.h",
    135        "intl/icu/source/i18n/double-conversion-strtod.cpp",
    136        "intl/icu/source/i18n/double-conversion-strtod.h",
    137        "intl/icu/source/i18n/double-conversion-utils.h",
    138        "intl/icu/source/i18n/double-conversion.h",
    139    ]
    140 )
    141 
    142 
    143 def ensure_source_file_exists(dir, filename):
    144    f = mozpath.join(dir, filename)
    145    if os.path.isfile(f):
    146        return f
    147    raise Exception("Couldn't find source file for: %s" % filename)
    148 
    149 
    150 def get_sources(sources_file):
    151    srcdir = os.path.dirname(sources_file)
    152    with open(sources_file) as f:
    153        return sorted(
    154            (ensure_source_file_exists(srcdir, name.strip()) for name in f),
    155            key=lambda x: x.lower(),
    156        )
    157 
    158 
    159 def list_headers(path):
    160    result = []
    161    for name in os.listdir(path):
    162        f = mozpath.join(path, name)
    163        if os.path.isfile(f):
    164            result.append(f)
    165    return sorted(result, key=lambda x: x.lower())
    166 
    167 
    168 def write_sources(mozbuild, sources, headers):
    169    with open(mozbuild, "w", newline="\n", encoding="utf-8") as f:
    170        f.write(
    171            "# THIS FILE IS GENERATED BY /intl/icu_sources_data.py " + "DO NOT EDIT\n"
    172        )
    173 
    174        def write_list(name, content):
    175            if content:
    176                f.write("%s %s [\n" % (name, "=" if name.islower() else "+="))
    177                f.write("".join('    "/%s",\n' % s for s in content))
    178                f.write("]\n")
    179 
    180        write_list("sources", [s for s in sources if s not in UNUSED_SOURCES])
    181        write_list("other_sources", [s for s in sources if s in UNUSED_SOURCES])
    182        write_list("EXPORTS.unicode", headers)
    183 
    184 
    185 def update_sources(topsrcdir):
    186    print("Updating ICU sources lists...")
    187    for d in ["common", "i18n", "tools/toolutil", "tools/icupkg"]:
    188        base_path = mozpath.join(topsrcdir, "intl/icu/source/%s" % d)
    189        sources_file = mozpath.join(base_path, "sources.txt")
    190        mozbuild = mozpath.join(
    191            topsrcdir, "config/external/icu/%s/sources.mozbuild" % mozpath.basename(d)
    192        )
    193        sources = [mozpath.relpath(s, topsrcdir) for s in get_sources(sources_file)]
    194        unicode_dir = mozpath.join(base_path, "unicode")
    195        if os.path.exists(unicode_dir):
    196            headers = [
    197                mozpath.normsep(os.path.relpath(s, topsrcdir))
    198                for s in list_headers(unicode_dir)
    199            ]
    200        else:
    201            headers = None
    202        write_sources(mozbuild, sources, headers)
    203 
    204 
    205 def try_run(name, command, cwd=None, **kwargs):
    206    try:
    207        with tempfile.NamedTemporaryFile(prefix=name, delete=False) as f:
    208            subprocess.check_call(
    209                command, cwd=cwd, stdout=f, stderr=subprocess.STDOUT, **kwargs
    210            )
    211    except subprocess.CalledProcessError:
    212        print(
    213            """Error running "{}" in directory {}
    214    See output in {}""".format(" ".join(command), cwd, f.name),
    215            file=sys.stderr,
    216        )
    217        return False
    218    else:
    219        os.unlink(f.name)
    220        return True
    221 
    222 
    223 def get_data_file(data_dir):
    224    files = glob.glob(mozpath.join(data_dir, "icudt*.dat"))
    225    return files[0] if files else None
    226 
    227 
    228 def update_data_file(topsrcdir):
    229    objdir = tempfile.mkdtemp(prefix="icu-obj-")
    230    configure = mozpath.join(topsrcdir, "intl/icu/source/configure")
    231    env = dict(os.environ)
    232    # bug 1262101 - these should be shared with the moz.build files
    233    env.update({
    234        "CPPFLAGS": (
    235            "-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 "
    236            + "-DU_HIDE_OBSOLETE_UTF_OLD_H=1 "
    237            + "-DUCONFIG_NO_LEGACY_CONVERSION "
    238            + "-DUCONFIG_NO_TRANSLITERATION "
    239            + "-DUCONFIG_NO_REGULAR_EXPRESSIONS "
    240            + "-DUCONFIG_NO_BREAK_ITERATION "
    241            + "-DUCONFIG_NO_IDNA "
    242            + "-DUCONFIG_NO_MF2 "
    243            + "-DU_CHARSET_IS_UTF8 "
    244        )
    245    })
    246 
    247    # Exclude data that we currently don't need.
    248    #
    249    # The file format for ICU's data build tool is described at
    250    # <https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md>.
    251    env["ICU_DATA_FILTER_FILE"] = mozpath.join(topsrcdir, "intl/icu/data_filter.json")
    252 
    253    print("Running ICU configure...")
    254    if not try_run(
    255        "icu-configure",
    256        [
    257            "sh",
    258            configure,
    259            "--with-data-packaging=archive",
    260            "--enable-static",
    261            "--disable-shared",
    262            "--disable-extras",
    263            "--disable-icuio",
    264            "--disable-layout",
    265            "--disable-layoutex",
    266            "--disable-tests",
    267            "--disable-samples",
    268            "--disable-strict",
    269        ],
    270        cwd=objdir,
    271        env=env,
    272    ):
    273        return False
    274    print("Running ICU make...")
    275    if not try_run(
    276        "icu-make",
    277        ["make", "--jobs=%d" % cpu_count(), "--output-sync"],
    278        cwd=objdir,
    279    ):
    280        return False
    281    print("Copying ICU data file...")
    282    tree_data_path = mozpath.join(topsrcdir, "config/external/icu/data/")
    283    old_data_file = get_data_file(tree_data_path)
    284    if not old_data_file:
    285        print("Error: no ICU data file in %s" % tree_data_path, file=sys.stderr)
    286        return False
    287    new_data_file = get_data_file(mozpath.join(objdir, "data/out"))
    288    if not new_data_file:
    289        print("Error: no ICU data in ICU objdir", file=sys.stderr)
    290        return False
    291    if os.path.basename(old_data_file) != os.path.basename(new_data_file):
    292        # Data file name has the major version number embedded.
    293        os.unlink(old_data_file)
    294    shutil.copy(new_data_file, tree_data_path)
    295    try:
    296        shutil.rmtree(objdir)
    297    except Exception:
    298        print("Warning: failed to remove %s" % objdir, file=sys.stderr)
    299    return True
    300 
    301 
    302 def main():
    303    if len(sys.argv) != 2:
    304        print("Usage: icu_sources_data.py <mozilla topsrcdir>", file=sys.stderr)
    305        sys.exit(1)
    306 
    307    topsrcdir = mozpath.abspath(sys.argv[1])
    308    update_sources(topsrcdir)
    309    if not update_data_file(topsrcdir):
    310        print("Error updating ICU data file", file=sys.stderr)
    311        sys.exit(1)
    312 
    313 
    314 if __name__ == "__main__":
    315    main()