icu_sources_data.py (11810B)
1 #!/usr/bin/env python 2 # 3 # Any copyright is dedicated to the Public Domain. 4 # http://creativecommons.org/publicdomain/zero/1.0/ 5 # 6 # Generate SOURCES in sources.mozbuild files from ICU's Makefile.in 7 # files, and also build a standalone copy of ICU using its build 8 # system to generate a new copy of the in-tree ICU data file. 9 # 10 # This script expects to be run from `update-icu.sh` after the in-tree 11 # copy of ICU has been updated. 12 13 import glob 14 import os 15 import shutil 16 import subprocess 17 import sys 18 import tempfile 19 20 from mozbuild.util import cpu_count 21 from mozpack import path as mozpath 22 23 # The following files have been determined to be dead/unused by a 24 # semi-automated analysis. You can just remove any of the files below 25 # if you need them. However, files marked with a "Cluster" comment 26 # can only be removed together, as they have (directional) dependencies. 27 # If you want to rerun this analysis, contact :decoder. 28 UNUSED_SOURCES = set( 29 [ 30 "intl/icu/source/common/bytestrieiterator.cpp", 31 "intl/icu/source/common/cstr.cpp", 32 "intl/icu/source/common/cwchar.cpp", 33 "intl/icu/source/common/icudataver.cpp", 34 "intl/icu/source/common/icuplug.cpp", 35 "intl/icu/source/common/pluralmap.cpp", 36 "intl/icu/source/common/ucat.cpp", 37 "intl/icu/source/common/ucnv2022.cpp", 38 "intl/icu/source/common/ucnv_ct.cpp", 39 "intl/icu/source/common/ucnvdisp.cpp", 40 "intl/icu/source/common/ucnv_ext.cpp", 41 "intl/icu/source/common/ucnvhz.cpp", 42 "intl/icu/source/common/ucnvisci.cpp", 43 "intl/icu/source/common/ucnv_lmb.cpp", 44 "intl/icu/source/common/ucnvmbcs.cpp", 45 "intl/icu/source/common/uidna.cpp", 46 "intl/icu/source/common/unorm.cpp", 47 "intl/icu/source/common/usc_impl.cpp", 48 "intl/icu/source/common/ustr_wcs.cpp", 49 "intl/icu/source/i18n/anytrans.cpp", 50 "intl/icu/source/i18n/brktrans.cpp", 51 "intl/icu/source/i18n/casetrn.cpp", 52 "intl/icu/source/i18n/cpdtrans.cpp", 53 "intl/icu/source/i18n/esctrn.cpp", 54 "intl/icu/source/i18n/fmtable_cnv.cpp", 55 "intl/icu/source/i18n/funcrepl.cpp", 56 "intl/icu/source/i18n/gender.cpp", 57 "intl/icu/source/i18n/name2uni.cpp", 58 "intl/icu/source/i18n/nortrans.cpp", 59 "intl/icu/source/i18n/nultrans.cpp", 60 "intl/icu/source/i18n/quant.cpp", 61 "intl/icu/source/i18n/rbt.cpp", 62 "intl/icu/source/i18n/rbt_data.cpp", 63 "intl/icu/source/i18n/rbt_pars.cpp", 64 "intl/icu/source/i18n/rbt_rule.cpp", 65 "intl/icu/source/i18n/rbt_set.cpp", 66 "intl/icu/source/i18n/regexcmp.cpp", 67 "intl/icu/source/i18n/regeximp.cpp", 68 "intl/icu/source/i18n/regexst.cpp", 69 "intl/icu/source/i18n/regextxt.cpp", 70 "intl/icu/source/i18n/rematch.cpp", 71 "intl/icu/source/i18n/remtrans.cpp", 72 "intl/icu/source/i18n/repattrn.cpp", 73 "intl/icu/source/i18n/scientificnumberformatter.cpp", 74 "intl/icu/source/i18n/strmatch.cpp", 75 "intl/icu/source/i18n/strrepl.cpp", 76 "intl/icu/source/i18n/titletrn.cpp", 77 "intl/icu/source/i18n/tolowtrn.cpp", 78 "intl/icu/source/i18n/toupptrn.cpp", 79 "intl/icu/source/i18n/translit.cpp", 80 "intl/icu/source/i18n/transreg.cpp", 81 "intl/icu/source/i18n/tridpars.cpp", 82 "intl/icu/source/i18n/unesctrn.cpp", 83 "intl/icu/source/i18n/uni2name.cpp", 84 "intl/icu/source/i18n/uregexc.cpp", 85 "intl/icu/source/i18n/uregex.cpp", 86 "intl/icu/source/i18n/uregion.cpp", 87 "intl/icu/source/i18n/uspoof_build.cpp", 88 "intl/icu/source/i18n/uspoof_conf.cpp", 89 "intl/icu/source/i18n/utrans.cpp", 90 "intl/icu/source/i18n/vzone.cpp", 91 "intl/icu/source/i18n/zrule.cpp", 92 "intl/icu/source/i18n/ztrans.cpp", 93 # Cluster 94 "intl/icu/source/common/resbund_cnv.cpp", 95 "intl/icu/source/common/ures_cnv.cpp", 96 # Cluster 97 "intl/icu/source/common/propsvec.cpp", 98 "intl/icu/source/common/ucnvsel.cpp", 99 "intl/icu/source/common/ucnv_set.cpp", 100 # Cluster 101 "intl/icu/source/common/ubiditransform.cpp", 102 "intl/icu/source/common/ushape.cpp", 103 # Cluster 104 "intl/icu/source/i18n/csdetect.cpp", 105 "intl/icu/source/i18n/csmatch.cpp", 106 "intl/icu/source/i18n/csr2022.cpp", 107 "intl/icu/source/i18n/csrecog.cpp", 108 "intl/icu/source/i18n/csrmbcs.cpp", 109 "intl/icu/source/i18n/csrsbcs.cpp", 110 "intl/icu/source/i18n/csrucode.cpp", 111 "intl/icu/source/i18n/csrutf8.cpp", 112 "intl/icu/source/i18n/inputext.cpp", 113 "intl/icu/source/i18n/ucsdet.cpp", 114 # Cluster 115 "intl/icu/source/i18n/alphaindex.cpp", 116 "intl/icu/source/i18n/ulocdata.cpp", 117 ] 118 + 119 # We use the version of double-conversion vendored in mfbt instead 120 [ 121 "intl/icu/source/i18n/double-conversion-bignum-dtoa.cpp", 122 "intl/icu/source/i18n/double-conversion-bignum-dtoa.h", 123 "intl/icu/source/i18n/double-conversion-bignum.cpp", 124 "intl/icu/source/i18n/double-conversion-bignum.h", 125 "intl/icu/source/i18n/double-conversion-cached-powers.cpp", 126 "intl/icu/source/i18n/double-conversion-cached-powers.h", 127 "intl/icu/source/i18n/double-conversion-diy-fp.h", 128 "intl/icu/source/i18n/double-conversion-double-to-string.cpp", 129 "intl/icu/source/i18n/double-conversion-double-to-string.h", 130 "intl/icu/source/i18n/double-conversion-fast-dtoa.cpp", 131 "intl/icu/source/i18n/double-conversion-fast-dtoa.h", 132 "intl/icu/source/i18n/double-conversion-ieee.h", 133 "intl/icu/source/i18n/double-conversion-string-to-double.cpp", 134 "intl/icu/source/i18n/double-conversion-string-to-double.h", 135 "intl/icu/source/i18n/double-conversion-strtod.cpp", 136 "intl/icu/source/i18n/double-conversion-strtod.h", 137 "intl/icu/source/i18n/double-conversion-utils.h", 138 "intl/icu/source/i18n/double-conversion.h", 139 ] 140 ) 141 142 143 def ensure_source_file_exists(dir, filename): 144 f = mozpath.join(dir, filename) 145 if os.path.isfile(f): 146 return f 147 raise Exception("Couldn't find source file for: %s" % filename) 148 149 150 def get_sources(sources_file): 151 srcdir = os.path.dirname(sources_file) 152 with open(sources_file) as f: 153 return sorted( 154 (ensure_source_file_exists(srcdir, name.strip()) for name in f), 155 key=lambda x: x.lower(), 156 ) 157 158 159 def list_headers(path): 160 result = [] 161 for name in os.listdir(path): 162 f = mozpath.join(path, name) 163 if os.path.isfile(f): 164 result.append(f) 165 return sorted(result, key=lambda x: x.lower()) 166 167 168 def write_sources(mozbuild, sources, headers): 169 with open(mozbuild, "w", newline="\n", encoding="utf-8") as f: 170 f.write( 171 "# THIS FILE IS GENERATED BY /intl/icu_sources_data.py " + "DO NOT EDIT\n" 172 ) 173 174 def write_list(name, content): 175 if content: 176 f.write("%s %s [\n" % (name, "=" if name.islower() else "+=")) 177 f.write("".join(' "/%s",\n' % s for s in content)) 178 f.write("]\n") 179 180 write_list("sources", [s for s in sources if s not in UNUSED_SOURCES]) 181 write_list("other_sources", [s for s in sources if s in UNUSED_SOURCES]) 182 write_list("EXPORTS.unicode", headers) 183 184 185 def update_sources(topsrcdir): 186 print("Updating ICU sources lists...") 187 for d in ["common", "i18n", "tools/toolutil", "tools/icupkg"]: 188 base_path = mozpath.join(topsrcdir, "intl/icu/source/%s" % d) 189 sources_file = mozpath.join(base_path, "sources.txt") 190 mozbuild = mozpath.join( 191 topsrcdir, "config/external/icu/%s/sources.mozbuild" % mozpath.basename(d) 192 ) 193 sources = [mozpath.relpath(s, topsrcdir) for s in get_sources(sources_file)] 194 unicode_dir = mozpath.join(base_path, "unicode") 195 if os.path.exists(unicode_dir): 196 headers = [ 197 mozpath.normsep(os.path.relpath(s, topsrcdir)) 198 for s in list_headers(unicode_dir) 199 ] 200 else: 201 headers = None 202 write_sources(mozbuild, sources, headers) 203 204 205 def try_run(name, command, cwd=None, **kwargs): 206 try: 207 with tempfile.NamedTemporaryFile(prefix=name, delete=False) as f: 208 subprocess.check_call( 209 command, cwd=cwd, stdout=f, stderr=subprocess.STDOUT, **kwargs 210 ) 211 except subprocess.CalledProcessError: 212 print( 213 """Error running "{}" in directory {} 214 See output in {}""".format(" ".join(command), cwd, f.name), 215 file=sys.stderr, 216 ) 217 return False 218 else: 219 os.unlink(f.name) 220 return True 221 222 223 def get_data_file(data_dir): 224 files = glob.glob(mozpath.join(data_dir, "icudt*.dat")) 225 return files[0] if files else None 226 227 228 def update_data_file(topsrcdir): 229 objdir = tempfile.mkdtemp(prefix="icu-obj-") 230 configure = mozpath.join(topsrcdir, "intl/icu/source/configure") 231 env = dict(os.environ) 232 # bug 1262101 - these should be shared with the moz.build files 233 env.update({ 234 "CPPFLAGS": ( 235 "-DU_NO_DEFAULT_INCLUDE_UTF_HEADERS=1 " 236 + "-DU_HIDE_OBSOLETE_UTF_OLD_H=1 " 237 + "-DUCONFIG_NO_LEGACY_CONVERSION " 238 + "-DUCONFIG_NO_TRANSLITERATION " 239 + "-DUCONFIG_NO_REGULAR_EXPRESSIONS " 240 + "-DUCONFIG_NO_BREAK_ITERATION " 241 + "-DUCONFIG_NO_IDNA " 242 + "-DUCONFIG_NO_MF2 " 243 + "-DU_CHARSET_IS_UTF8 " 244 ) 245 }) 246 247 # Exclude data that we currently don't need. 248 # 249 # The file format for ICU's data build tool is described at 250 # <https://github.com/unicode-org/icu/blob/master/docs/userguide/icu_data/buildtool.md>. 251 env["ICU_DATA_FILTER_FILE"] = mozpath.join(topsrcdir, "intl/icu/data_filter.json") 252 253 print("Running ICU configure...") 254 if not try_run( 255 "icu-configure", 256 [ 257 "sh", 258 configure, 259 "--with-data-packaging=archive", 260 "--enable-static", 261 "--disable-shared", 262 "--disable-extras", 263 "--disable-icuio", 264 "--disable-layout", 265 "--disable-layoutex", 266 "--disable-tests", 267 "--disable-samples", 268 "--disable-strict", 269 ], 270 cwd=objdir, 271 env=env, 272 ): 273 return False 274 print("Running ICU make...") 275 if not try_run( 276 "icu-make", 277 ["make", "--jobs=%d" % cpu_count(), "--output-sync"], 278 cwd=objdir, 279 ): 280 return False 281 print("Copying ICU data file...") 282 tree_data_path = mozpath.join(topsrcdir, "config/external/icu/data/") 283 old_data_file = get_data_file(tree_data_path) 284 if not old_data_file: 285 print("Error: no ICU data file in %s" % tree_data_path, file=sys.stderr) 286 return False 287 new_data_file = get_data_file(mozpath.join(objdir, "data/out")) 288 if not new_data_file: 289 print("Error: no ICU data in ICU objdir", file=sys.stderr) 290 return False 291 if os.path.basename(old_data_file) != os.path.basename(new_data_file): 292 # Data file name has the major version number embedded. 293 os.unlink(old_data_file) 294 shutil.copy(new_data_file, tree_data_path) 295 try: 296 shutil.rmtree(objdir) 297 except Exception: 298 print("Warning: failed to remove %s" % objdir, file=sys.stderr) 299 return True 300 301 302 def main(): 303 if len(sys.argv) != 2: 304 print("Usage: icu_sources_data.py <mozilla topsrcdir>", file=sys.stderr) 305 sys.exit(1) 306 307 topsrcdir = mozpath.abspath(sys.argv[1]) 308 update_sources(topsrcdir) 309 if not update_data_file(topsrcdir): 310 print("Error updating ICU data file", file=sys.stderr) 311 sys.exit(1) 312 313 314 if __name__ == "__main__": 315 main()