mirror of
https://gitee.com/openharmony/third_party_rust_regex
synced 2025-04-12 15:43:16 +00:00
53 lines
1.7 KiB
Bash
Executable File
53 lines
1.7 KiB
Bash
Executable File
#!/bin/sh
|
|
|
|
# This script is responsible for generating some of the Unicode tables used
|
|
# in regex-syntax.
|
|
#
|
|
# Usage is simple, first download the Unicode data:
|
|
#
|
|
# $ mkdir ucd
|
|
# $ cd ucd
|
|
# $ curl -LO https://www.unicode.org/Public/zipped/12.1.0/UCD.zip
|
|
# $ unzip UCD.zip
|
|
# $ curl -LO https://unicode.org/Public/emoji/12.0/emoji-data.txt
|
|
#
|
|
# And then run this script from the root of this repository by pointing it at
|
|
# the data directory downloaded above:
|
|
#
|
|
# $ ./scripts/generate-unicode-tables path/to/ucd
|
|
|
|
if [ $# != 1 ]; then
|
|
echo "Usage: $(basename "$0") <ucd-data-directory>" >&2
|
|
exit 1
|
|
fi
|
|
ucddir="$1"
|
|
|
|
out="regex-syntax/src/unicode_tables"
|
|
ucd-generate age "$ucddir" \
|
|
--chars > "$out/age.rs"
|
|
ucd-generate case-folding-simple "$ucddir" \
|
|
--chars --all-pairs > "$out/case_folding_simple.rs"
|
|
ucd-generate general-category "$ucddir" \
|
|
--chars --exclude surrogate > "$out/general_category.rs"
|
|
ucd-generate grapheme-cluster-break "$ucddir" \
|
|
--chars > "$out/grapheme_cluster_break.rs"
|
|
ucd-generate perl-word "$ucddir" \
|
|
--chars > "$out/perl_word.rs"
|
|
ucd-generate property-bool "$ucddir" \
|
|
--chars > "$out/property_bool.rs"
|
|
ucd-generate property-names "$ucddir" \
|
|
> "$out/property_names.rs"
|
|
ucd-generate property-values "$ucddir" \
|
|
--include gc,script,scx,age,gcb,wb,sb > "$out/property_values.rs"
|
|
ucd-generate script "$ucddir" \
|
|
--chars > "$out/script.rs"
|
|
ucd-generate script-extension "$ucddir" \
|
|
--chars > "$out/script_extension.rs"
|
|
ucd-generate sentence-break "$ucddir" \
|
|
--chars > "$out/sentence_break.rs"
|
|
ucd-generate word-break "$ucddir" \
|
|
--chars > "$out/word_break.rs"
|
|
|
|
# Make sure everything is formatted.
|
|
cargo +stable fmt --all
|