mirror of
https://gitee.com/openharmony/third_party_icu
synced 2024-11-30 03:40:40 +00:00
141 lines
4.2 KiB
Python
141 lines
4.2 KiB
Python
|
#! /usr/bin/python -B
|
||
|
|
||
|
# Copyright (C) 2016 and later: Unicode, Inc. and others.
|
||
|
# License & terms of use: http://www.unicode.org/copyright.html
|
||
|
|
||
|
# Copyright (C) 2009-2011, International Business Machines Corporation, Google and Others.
|
||
|
# All rights reserved.
|
||
|
|
||
|
#
|
||
|
# Script to check that ICU source files contain only valid UTF-8 encoded text,
|
||
|
# and that all files except '.txt' files do not contain a Byte Order Mark (BOM).
|
||
|
#
|
||
|
# THIS SCRIPT DOES NOT WORK ON WINDOWS
|
||
|
# It only works correctly on platforms where the native line ending is a plain \n
|
||
|
#
|
||
|
# usage:
|
||
|
# icu-file-utf8-check.py [options]
|
||
|
#
|
||
|
# options:
|
||
|
# -h | --help Print a usage line and exit.
|
||
|
#
|
||
|
# The tool operates recursively on the directory from which it is run.
|
||
|
# Only files from the ICU github repository are checked.
|
||
|
# No changes are made to the repository; only the working copy will be altered.
|
||
|
# The script checks all source files and returns a non-zero exit code if any of
|
||
|
# the checked files contain a non-UTF-8 character.
|
||
|
|
||
|
from __future__ import print_function
|
||
|
|
||
|
import sys
|
||
|
import os
|
||
|
import os.path
|
||
|
import re
|
||
|
import getopt
|
||
|
|
||
|
|
||
|
# List of directories to check for UTF-8 and BOM. Currently covers
|
||
|
# all of icu/. Modify as needed.
|
||
|
icu_directories_to_be_scanned = ["."]
|
||
|
|
||
|
# Files that are allowed to contain \r line endings. If this list
|
||
|
# grows too long consider a file instead.
|
||
|
ignore_cr_in_files = [
|
||
|
"vendor/double-conversion/upstream/msvc/testrunner.cmd"
|
||
|
]
|
||
|
|
||
|
def runCommand(cmd):
|
||
|
output_file = os.popen(cmd);
|
||
|
output_text = output_file.read();
|
||
|
exit_status = output_file.close();
|
||
|
|
||
|
return output_text, exit_status
|
||
|
|
||
|
|
||
|
def usage():
|
||
|
print("usage: " + sys.argv[0] + " [-h | --help]")
|
||
|
|
||
|
|
||
|
#
|
||
|
# File check. Check source code files for UTF-8 and all except text files for not containing a BOM
|
||
|
# file_name: name of a text file.
|
||
|
# is_source: Flag, set to True if file is a source code file (.c, .cpp, .h, .java).
|
||
|
#
|
||
|
def check_file(file_name, is_source):
|
||
|
rc = 0
|
||
|
f = open(file_name, 'rb')
|
||
|
bytes = f.read()
|
||
|
f.close()
|
||
|
|
||
|
if is_source:
|
||
|
try:
|
||
|
bytes.decode("UTF-8")
|
||
|
except UnicodeDecodeError:
|
||
|
print("Error: %s is a source code file but contains non-utf-8 bytes." % file_name)
|
||
|
rc = 1
|
||
|
|
||
|
if bytes[0] == 0xef:
|
||
|
if not (file_name.endswith(".txt") or file_name.endswith(".sln")
|
||
|
or file_name.endswith(".targets") or ".vcxproj" in file_name):
|
||
|
print("Warning: file %s contains a UTF-8 BOM: " % file_name)
|
||
|
rc = 1
|
||
|
|
||
|
return rc
|
||
|
|
||
|
def main(argv):
|
||
|
exit_status = 0
|
||
|
rc = 0
|
||
|
|
||
|
try:
|
||
|
opts, args = getopt.getopt(argv, "h", ("help"))
|
||
|
except getopt.GetoptError:
|
||
|
print("unrecognized option: " + argv[0])
|
||
|
usage()
|
||
|
sys.exit(2)
|
||
|
for opt, arg in opts:
|
||
|
if opt in ("-h", "--help"):
|
||
|
usage()
|
||
|
sys.exit()
|
||
|
if args:
|
||
|
print("unexpected command line argument")
|
||
|
usage()
|
||
|
sys.exit(2)
|
||
|
|
||
|
source_file_re = re.compile(".*((?:\\.c$)|(?:\\.cpp$)|(?:\\.h$)|(?:\\.java$))")
|
||
|
git_cmd = "git ls-files DIR"
|
||
|
|
||
|
for dir in icu_directories_to_be_scanned:
|
||
|
print('Scanning ' + dir)
|
||
|
cmd = git_cmd.replace("DIR", dir)
|
||
|
output, rc = runCommand(cmd)
|
||
|
if rc:
|
||
|
print('"', cmd, '" failed. Exiting.', file=sys.stderr)
|
||
|
file_list = output.splitlines()
|
||
|
|
||
|
for f in file_list:
|
||
|
if os.path.isdir(f):
|
||
|
print("Skipping dir " + f)
|
||
|
continue
|
||
|
if not os.path.isfile(f):
|
||
|
print("Repository file not in working copy: " + f)
|
||
|
continue;
|
||
|
|
||
|
source_file = source_file_re.match(f)
|
||
|
if check_file(f, source_file) != 0:
|
||
|
exit_status = 1
|
||
|
|
||
|
# Lastly, check the line endings of the file.
|
||
|
# Note that 'grep' returns null if it reports a file,
|
||
|
# a non-null value otherwise.
|
||
|
output, rc = runCommand("grep -rPIl \"\\r\" " + f)
|
||
|
if (rc is None):
|
||
|
if f not in ignore_cr_in_files:
|
||
|
print("File ", f, " has \\r line ending")
|
||
|
exit_status = 1
|
||
|
|
||
|
print(exit_status)
|
||
|
sys.exit(exit_status)
|
||
|
|
||
|
if __name__ == "__main__":
|
||
|
main(sys.argv[1:])
|