mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-12-02 18:08:58 +00:00
259f753603
Update script and patches for ICU4X 1.5 Differential Revision: https://phabricator.services.mozilla.com/D213006
129 lines
4.5 KiB
Bash
Executable File
129 lines
4.5 KiB
Bash
Executable File
#!/bin/sh
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
set -e
|
|
|
|
# Update the icu4x binary data for a given release:
|
|
# Usage: update-icu4x.sh <URL of ICU GIT> <release tag name> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
|
|
# update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.5.0 45.0.0 release-75-1 1.5.0
|
|
#
|
|
# Update to the main branch:
|
|
# Usage: update-icu4x.sh <URL of ICU GIT> <branch> <CLDR version> <ICU release tag name> <ICU4X version of icu_capi>
|
|
# update-icu4x.sh https://github.com/unicode-org/icu4x.git main 45.0.0 release-75-1 1.5.0
|
|
|
|
# default
|
|
cldr=${3:-45.0.0}
|
|
icuexport=${4:-release-75-1}
|
|
icu4x_version=${5:-1.5.0}
|
|
|
|
if [ $# -lt 2 ]; then
|
|
echo "Usage: update-icu4x.sh <URL of ICU4X GIT> <ICU4X release tag name> <CLDR version> <ICU release tag name> <ICU4X version for icu_capi>"
|
|
echo "Example: update-icu4x.sh https://github.com/unicode-org/icu4x.git icu@1.5.0 45.0.0 release-75-1 1.5.0"
|
|
exit 1
|
|
fi
|
|
|
|
# Make a log function so the output is easy to read.
|
|
log() {
|
|
CYAN='\033[0;36m'
|
|
CLEAR='\033[0m'
|
|
printf "${CYAN}[update-icu4x]${CLEAR} $*\n"
|
|
}
|
|
|
|
# Specify locale and time zone information for consistent output and reproduceability.
|
|
export TZ=UTC
|
|
export LANG=en_US.UTF-8
|
|
export LANGUAGE=en_US
|
|
export LC_ALL=en_US.UTF-8
|
|
|
|
# Define all of the paths.
|
|
original_pwd=$(pwd)
|
|
top_src_dir=$(cd -- "$(dirname "$0")/.." >/dev/null 2>&1 ; pwd -P)
|
|
segmenter_data_dir=${top_src_dir}/intl/icu_segmenter_data/data
|
|
git_info_file=${segmenter_data_dir}/ICU4X-GIT-INFO
|
|
|
|
log "Remove the old data"
|
|
rm -rf ${segmenter_data_dir}
|
|
|
|
log "Download icuexportdata"
|
|
tmpicuexportdir=$(mktemp -d)
|
|
icuexport_filename=`echo "icuexportdata_${icuexport}.zip" | sed "s/\//-/g"`
|
|
cd ${tmpicuexportdir}
|
|
wget https://github.com/unicode-org/icu/releases/download/${icuexport}/${icuexport_filename}
|
|
|
|
log "Patching icuexportdata to reduce data size"
|
|
unzip ${icuexport_filename}
|
|
for toml in \
|
|
burmesedict.toml \
|
|
khmerdict.toml \
|
|
laodict.toml \
|
|
thaidict.toml \
|
|
; do
|
|
cp ${top_src_dir}/intl/icu4x-patches/empty.toml ${tmpicuexportdir}/segmenter/dictionary/$toml
|
|
done
|
|
|
|
log "Clone ICU4X"
|
|
tmpclonedir=$(mktemp -d)
|
|
git clone --depth 1 --branch $2 $1 ${tmpclonedir}
|
|
|
|
log "Change the directory to the cloned repo"
|
|
log ${tmpclonedir}
|
|
cd ${tmpclonedir}
|
|
|
|
log "Copy icu_capi crate to local since we need a patched version"
|
|
rm -rf ${top_src_dir}/intl/icu_capi
|
|
wget -O icu_capi.tar.gz https://crates.io/api/v1/crates/icu_capi/${icu4x_version}/download
|
|
tar xf icu_capi.tar.gz -C ${top_src_dir}/intl
|
|
mv ${top_src_dir}/intl/icu_capi-${icu4x_version} ${top_src_dir}/intl/icu_capi
|
|
rm -rf icu_capi_tar.gz
|
|
|
|
log "Patching icu_capi"
|
|
for patch in \
|
|
001-Cargo.toml.patch \
|
|
002-dead-code.patch \
|
|
; do
|
|
patch -d ${top_src_dir} -p1 --no-backup-if-mismatch < ${top_src_dir}/intl/icu4x-patches/$patch
|
|
done
|
|
|
|
# ICU4X 1.3 or later with icu_capi uses each compiled_data crate.
|
|
|
|
log "Run the icu4x-datagen tool to regenerate the segmenter data."
|
|
log "Saving the data into: ${segmenter_data_dir}"
|
|
|
|
# TODO(Bug 1741262) - Should locales be filtered as well? It doesn't appear that the existing ICU
|
|
# data builder is using any locale filtering.
|
|
|
|
# --keys <KEYS>...
|
|
# Include this resource key in the output. Accepts multiple arguments.
|
|
# --key-file <KEY_FILE>
|
|
# Path to text file with resource keys to include, one per line. Empty lines and
|
|
# lines starting with '#' are ignored.
|
|
cargo run --bin icu4x-datagen \
|
|
--features=bin \
|
|
-- \
|
|
--cldr-tag ${cldr} \
|
|
--icuexport-root ${tmpicuexportdir} \
|
|
--keys segmenter/dictionary/w_auto@1 \
|
|
--keys segmenter/dictionary/wl_ext@1 \
|
|
--keys segmenter/grapheme@1 \
|
|
--keys segmenter/line@1 \
|
|
--keys segmenter/lstm/wl_auto@1 \
|
|
--keys segmenter/sentence@1 \
|
|
--keys segmenter/word@1 \
|
|
--all-locales \
|
|
--format mod \
|
|
--out ${segmenter_data_dir} \
|
|
|
|
log "Record the current cloned git information to:"
|
|
log ${git_info_file}
|
|
# (This ensures that if ICU modifications are performed properly, it's always
|
|
# possible to run the command at the top of this script and make no changes to
|
|
# the tree.)
|
|
git -C ${tmpclonedir} log -1 > ${git_info_file}
|
|
|
|
log "Clean up the tmp directory"
|
|
cd ${original_pwd}
|
|
rm -rf ${tmpclonedir}
|
|
rm -rf ${tmpicuexportdir}
|