tor-browser

The Tor Browser
git clone https://git.dasho.dev/tor-browser.git
Log | Files | Refs | README | LICENSE

update-icu4x.sh (4591B)


      1 #!/bin/sh
      2 # This Source Code Form is subject to the terms of the Mozilla Public
      3 # License, v. 2.0. If a copy of the MPL was not distributed with this
      4 # file, You can obtain one at http://mozilla.org/MPL/2.0/.
      5 
      6 set -e
      7 
      8 # Update the icu4x binary data for a given release:
      9 #   Usage: update-icu4x.sh <URL of ICU GIT> <release tag name> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
     10 #   update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.5.0 45.0.0 release-75-1 1.5.0
     11 #
     12 # Update to the main branch:
     13 #   Usage: update-icu4x.sh <URL of ICU GIT> <branch> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
     14 #   update-icu4x.sh https://github.com/unicode-org/icu4x.git main 45.0.0 release-75-1 1.5.0
     15 
     16 # default
     17 cldr=${3:-47.0.0}
     18 icuexport=${4:-release-77-1}
     19 icu4x_version=${5:-2.0.0}
     20 
     21 if [ $# -lt 2 ]; then
     22  echo "Usage: update-icu4x.sh <URL of ICU4X GIT> <ICU4X release tag name> <CLDR version> <ICU release tag name> <ICU4X version for icu_capi>"
     23  echo "Example: update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@2.0.0 47.0.0 release-77-1 2.0.0"
     24  exit 1
     25 fi
     26 
     27 # Make a log function so the output is easy to read.
     28 log() {
     29  CYAN='\033[0;36m'
     30  CLEAR='\033[0m'
     31  printf "${CYAN}[update-icu4x]${CLEAR} $*\n"
     32 }
     33 
     34 # Specify locale and time zone information for consistent output and reproduceability.
     35 export TZ=UTC
     36 export LANG=en_US.UTF-8
     37 export LANGUAGE=en_US
     38 export LC_ALL=en_US.UTF-8
     39 
     40 # Define all of the paths.
     41 original_pwd=$(pwd)
     42 top_src_dir=$(cd -- "$(dirname "$0")/.." >/dev/null 2>&1 ; pwd -P)
     43 segmenter_data_dir=${top_src_dir}/intl/icu_segmenter_data/data
     44 git_info_file=${segmenter_data_dir}/ICU4X-GIT-INFO
     45 
     46 log "Remove the old data"
     47 rm -rf ${segmenter_data_dir}
     48 
     49 log "Download icuexportdata"
     50 tmpicuexportdir=$(mktemp -d)
     51 icuexport_filename=`echo "icuexportdata_${icuexport}.zip" | sed "s/\//-/g"`
     52 cd ${tmpicuexportdir}
     53 wget https://github.com/unicode-org/icu/releases/download/${icuexport}/${icuexport_filename}
     54 
     55 log "Patching icuexportdata to reduce data size"
     56 unzip ${icuexport_filename}
     57 for toml in          \
     58    burmesedict.toml \
     59    khmerdict.toml   \
     60    laodict.toml     \
     61    thaidict.toml    \
     62 ; do
     63    cp ${top_src_dir}/intl/icu4x-patches/empty.toml ${tmpicuexportdir}/segmenter/dictionary/$toml
     64 done
     65 
     66 log "Clone ICU4X"
     67 tmpclonedir=$(mktemp -d)
     68 git clone --depth 1 --branch $2 $1 ${tmpclonedir}
     69 
     70 log "Change the directory to the cloned repo"
     71 log ${tmpclonedir}
     72 cd ${tmpclonedir}
     73 
     74 log "Copy icu_capi crate to local since we need a patched version"
     75 rm -rf ${top_src_dir}/intl/icu_capi
     76 wget -O icu_capi.tar.gz https://crates.io/api/v1/crates/icu_capi/${icu4x_version}/download
     77 tar xf icu_capi.tar.gz -C ${top_src_dir}/intl
     78 mv ${top_src_dir}/intl/icu_capi-${icu4x_version} ${top_src_dir}/intl/icu_capi
     79 rm -rf icu_capi_tar.gz
     80 
     81 log "Patching icu_capi"
     82 for patch in \
     83    001-Cargo.toml.patch \
     84 ; do
     85    patch -d ${top_src_dir} -p1 --no-backup-if-mismatch < ${top_src_dir}/intl/icu4x-patches/$patch
     86 done
     87 
     88 # ICU4X 1.3 or later with icu_capi uses each compiled_data crate.
     89 
     90 log "Run the icu4x-datagen tool to regenerate the segmenter data."
     91 log "Saving the data into: ${segmenter_data_dir}"
     92 
     93 # TODO(Bug 1741262) - Should locales be filtered as well? It doesn't appear that the existing ICU
     94 # data builder is using any locale filtering.
     95 
     96 # --keys <KEYS>...
     97 #     Include this resource key in the output. Accepts multiple arguments.
     98 # --key-file <KEY_FILE>
     99 #     Path to text file with resource keys to include, one per line. Empty lines and
    100 #     lines starting with '#' are ignored.
    101 cargo run --bin icu4x-datagen          \
    102  --                                   \
    103  --cldr-tag ${cldr}                   \
    104  --icuexport-root ${tmpicuexportdir}  \
    105  -m SegmenterBreakGraphemeClusterV1   \
    106  -m SegmenterBreakLineV1              \
    107  -m SegmenterBreakSentenceV1          \
    108  -m SegmenterBreakSentenceOverrideV1  \
    109  -m SegmenterBreakWordV1              \
    110  -m SegmenterBreakWordOverrideV1      \
    111  -m SegmenterLstmAutoV1               \
    112  -m SegmenterDictionaryAutoV1         \
    113  -m SegmenterDictionaryExtendedV1     \
    114  --locales full                       \
    115  --format baked                       \
    116  --out ${segmenter_data_dir}          \
    117 
    118 log "Record the current cloned git information to:"
    119 log ${git_info_file}
    120 # (This ensures that if ICU modifications are performed properly, it's always
    121 # possible to run the command at the top of this script and make no changes to
    122 # the tree.)
    123 git -C ${tmpclonedir} log -1 > ${git_info_file}
    124 
    125 log "Clean up the tmp directory"
    126 cd ${original_pwd}
    127 rm -rf ${tmpclonedir}
    128 rm -rf ${tmpicuexportdir}