mirror of
https://github.com/darlinghq/darling-openjdk.git
synced 2024-11-27 14:20:27 +00:00
8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard
Reviewed-by: rriggs
This commit is contained in:
parent
4d70cdac4f
commit
2aac0e925d
@ -115,13 +115,14 @@ class CharacterData00 extends CharacterData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierStart(int ch) {
|
boolean isUnicodeIdentifierStart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierPart(int ch) {
|
boolean isUnicodeIdentifierPart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||||
return ((props & $$maskUnicodePart) != 0);
|
isIdentifierIgnorable(ch) ||
|
||||||
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isIdentifierIgnorable(int ch) {
|
boolean isIdentifierIgnorable(int ch) {
|
||||||
|
@ -114,13 +114,14 @@ class CharacterData01 extends CharacterData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierStart(int ch) {
|
boolean isUnicodeIdentifierStart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierPart(int ch) {
|
boolean isUnicodeIdentifierPart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||||
return ((props & $$maskUnicodePart) != 0);
|
isIdentifierIgnorable(ch) ||
|
||||||
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isIdentifierIgnorable(int ch) {
|
boolean isIdentifierIgnorable(int ch) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -113,13 +113,14 @@ class CharacterData02 extends CharacterData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierStart(int ch) {
|
boolean isUnicodeIdentifierStart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierPart(int ch) {
|
boolean isUnicodeIdentifierPart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||||
return ((props & $$maskUnicodePart) != 0);
|
isIdentifierIgnorable(ch) ||
|
||||||
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isIdentifierIgnorable(int ch) {
|
boolean isIdentifierIgnorable(int ch) {
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -113,15 +113,16 @@ class CharacterData0E extends CharacterData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierStart(int ch) {
|
boolean isUnicodeIdentifierStart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierPart(int ch) {
|
boolean isUnicodeIdentifierPart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||||
return ((props & $$maskUnicodePart) != 0);
|
isIdentifierIgnorable(ch) ||
|
||||||
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isIdentifierIgnorable(int ch) {
|
boolean isIdentifierIgnorable(int ch) {
|
||||||
int props = getProperties(ch);
|
int props = getProperties(ch);
|
||||||
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -133,13 +133,14 @@ class CharacterDataLatin1 extends CharacterData {
|
|||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierStart(int ch) {
|
boolean isUnicodeIdentifierStart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
|
||||||
return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isUnicodeIdentifierPart(int ch) {
|
boolean isUnicodeIdentifierPart(int ch) {
|
||||||
int props = getProperties(ch);
|
return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
|
||||||
return ((props & $$maskUnicodePart) != 0);
|
isIdentifierIgnorable(ch) ||
|
||||||
|
ch == 0x2E2F;
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean isIdentifierIgnorable(int ch) {
|
boolean isIdentifierIgnorable(int ch) {
|
||||||
|
11885
make/data/unicodedata/DerivedCoreProperties.txt
Normal file
11885
make/data/unicodedata/DerivedCoreProperties.txt
Normal file
File diff suppressed because it is too large
Load Diff
@ -42,6 +42,7 @@ define SetupCharacterData
|
|||||||
-spec $(UNICODEDATA)/UnicodeData.txt \
|
-spec $(UNICODEDATA)/UnicodeData.txt \
|
||||||
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
|
-specialcasing $(UNICODEDATA)/SpecialCasing.txt \
|
||||||
-proplist $(UNICODEDATA)/PropList.txt \
|
-proplist $(UNICODEDATA)/PropList.txt \
|
||||||
|
-derivedprops $(UNICODEDATA)/DerivedCoreProperties.txt \
|
||||||
-o $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/$1.java \
|
-o $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/$1.java \
|
||||||
-usecharforbyte $3
|
-usecharforbyte $3
|
||||||
|
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -73,6 +73,7 @@ public class GenerateCharacter {
|
|||||||
static String DefaultUnicodeSpecFileName = ROOT + "UnicodeData.txt";
|
static String DefaultUnicodeSpecFileName = ROOT + "UnicodeData.txt";
|
||||||
static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
|
static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
|
||||||
static String DefaultPropListFileName = ROOT + "PropList.txt";
|
static String DefaultPropListFileName = ROOT + "PropList.txt";
|
||||||
|
static String DefaultDerivedPropsFileName = ROOT + "DerivedCoreProperties.txt";
|
||||||
static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
|
static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
|
||||||
static String DefaultJavaOutputFileName = ROOT + "Character.java";
|
static String DefaultJavaOutputFileName = ROOT + "Character.java";
|
||||||
static String DefaultCTemplateFileName = ROOT + "Character.c.template";
|
static String DefaultCTemplateFileName = ROOT + "Character.c.template";
|
||||||
@ -159,6 +160,8 @@ public class GenerateCharacter {
|
|||||||
1 bit Other_Math property
|
1 bit Other_Math property
|
||||||
1 bit Ideographic property
|
1 bit Ideographic property
|
||||||
1 bit Noncharacter codepoint property
|
1 bit Noncharacter codepoint property
|
||||||
|
1 bit ID_Start property
|
||||||
|
1 bit ID_Continue property
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
|
||||||
@ -190,7 +193,7 @@ public class GenerateCharacter {
|
|||||||
// maskMirrored needs to be long, if up 16-bit
|
// maskMirrored needs to be long, if up 16-bit
|
||||||
private static final long maskMirrored = 0x80000000L;
|
private static final long maskMirrored = 0x80000000L;
|
||||||
|
|
||||||
// bit masks identify the 16-bit priperty field described above, in B
|
// bit masks identify the 16-bit property field described above, in B
|
||||||
// table
|
// table
|
||||||
private static final long
|
private static final long
|
||||||
maskOtherLowercase = 0x100000000L,
|
maskOtherLowercase = 0x100000000L,
|
||||||
@ -198,7 +201,9 @@ public class GenerateCharacter {
|
|||||||
maskOtherAlphabetic = 0x400000000L,
|
maskOtherAlphabetic = 0x400000000L,
|
||||||
maskOtherMath = 0x800000000L,
|
maskOtherMath = 0x800000000L,
|
||||||
maskIdeographic = 0x1000000000L,
|
maskIdeographic = 0x1000000000L,
|
||||||
maskNoncharacterCP = 0x2000000000L;
|
maskNoncharacterCP = 0x2000000000L,
|
||||||
|
maskIDStart = 0x4000000000L,
|
||||||
|
maskIDContinue = 0x8000000000L;
|
||||||
|
|
||||||
// Can compare masked values with these to determine
|
// Can compare masked values with these to determine
|
||||||
// numeric or lexical types.
|
// numeric or lexical types.
|
||||||
@ -367,6 +372,8 @@ public class GenerateCharacter {
|
|||||||
addExProp(result, propList, "Ideographic", maskIdeographic);
|
addExProp(result, propList, "Ideographic", maskIdeographic);
|
||||||
//addExProp(result, propList, "Other_Math", maskOtherMath);
|
//addExProp(result, propList, "Other_Math", maskOtherMath);
|
||||||
//addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
|
//addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
|
||||||
|
addExProp(result, propList, "ID_Start", maskIDStart);
|
||||||
|
addExProp(result, propList, "ID_Continue", maskIDContinue);
|
||||||
|
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
@ -780,6 +787,8 @@ OUTER: for (int i = 0; i < n; i += m) {
|
|||||||
if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
|
if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
|
||||||
if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
|
if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
|
||||||
if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
|
if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
|
||||||
|
if (x.equals("maskIDStart")) return "0x" + hex4(maskIDStart >> 32);
|
||||||
|
if (x.equals("maskIDContinue")) return "0x" + hex4(maskIDContinue >> 32);
|
||||||
if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
|
if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
|
||||||
if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
|
if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
|
||||||
if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
|
if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
|
||||||
@ -1612,6 +1621,7 @@ OUTER: for (int i = 0; i < n; i += m) {
|
|||||||
static String UnicodeSpecFileName = null; // liu
|
static String UnicodeSpecFileName = null; // liu
|
||||||
static String SpecialCasingFileName = null;
|
static String SpecialCasingFileName = null;
|
||||||
static String PropListFileName = null;
|
static String PropListFileName = null;
|
||||||
|
static String DerivedPropsFileName = null;
|
||||||
static boolean useCharForByte = false;
|
static boolean useCharForByte = false;
|
||||||
static int[] sizes;
|
static int[] sizes;
|
||||||
static int bins = 0; // liu; if > 0, then perform search
|
static int bins = 0; // liu; if > 0, then perform search
|
||||||
@ -1739,6 +1749,14 @@ OUTER: for (int i = 0; i < n; i += m) {
|
|||||||
PropListFileName = args[++j];
|
PropListFileName = args[++j];
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else if (args[j].equals("-derivedprops")) {
|
||||||
|
if (j == args.length -1) {
|
||||||
|
FAIL("File name missing after -derivedprops");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
DerivedPropsFileName = args[++j];
|
||||||
|
}
|
||||||
|
}
|
||||||
else if (args[j].equals("-plane")) {
|
else if (args[j].equals("-plane")) {
|
||||||
if (j == args.length -1) {
|
if (j == args.length -1) {
|
||||||
FAIL("Plane number missing after -plane");
|
FAIL("Plane number missing after -plane");
|
||||||
@ -1803,6 +1821,10 @@ OUTER: for (int i = 0; i < n; i += m) {
|
|||||||
PropListFileName = DefaultPropListFileName;
|
PropListFileName = DefaultPropListFileName;
|
||||||
desc.append(" [-proplist " + PropListFileName + ']');
|
desc.append(" [-proplist " + PropListFileName + ']');
|
||||||
}
|
}
|
||||||
|
if (DerivedPropsFileName == null) {
|
||||||
|
DerivedPropsFileName = DefaultDerivedPropsFileName;
|
||||||
|
desc.append(" [-derivedprops " + DerivedPropsFileName + ']');
|
||||||
|
}
|
||||||
if (TemplateFileName == null) {
|
if (TemplateFileName == null) {
|
||||||
TemplateFileName = (Csyntax ? DefaultCTemplateFileName
|
TemplateFileName = (Csyntax ? DefaultCTemplateFileName
|
||||||
: DefaultJavaTemplateFileName);
|
: DefaultJavaTemplateFileName);
|
||||||
@ -1954,6 +1976,7 @@ OUTER: for (int i = 0; i < n; i += m) {
|
|||||||
UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
|
UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
|
||||||
specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
|
specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
|
||||||
PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
|
PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
|
||||||
|
propList.putAll(PropList.readSpecFile(new File(DerivedPropsFileName), plane));
|
||||||
|
|
||||||
if (verbose) {
|
if (verbose) {
|
||||||
System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
|
System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
|
||||||
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
||||||
*
|
*
|
||||||
* This code is free software; you can redistribute it and/or modify it
|
* This code is free software; you can redistribute it and/or modify it
|
||||||
@ -31,7 +31,8 @@ import java.io.*;
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* A PropList object contains the lists of code points that have
|
* A PropList object contains the lists of code points that have
|
||||||
* the same Unicode property defined in PropList.txt
|
* the same Unicode property defined in PropList.txt and
|
||||||
|
* DerivedCoreProperties.txt
|
||||||
*
|
*
|
||||||
* @author Xueming Shen
|
* @author Xueming Shen
|
||||||
*/
|
*/
|
||||||
@ -51,8 +52,13 @@ public class PropList {
|
|||||||
return propMap.keySet();
|
return propMap.keySet();
|
||||||
}
|
}
|
||||||
|
|
||||||
private Map<String, ArrayList<Integer>> propMap =
|
public void putAll(PropList pl) {
|
||||||
new LinkedHashMap<String, ArrayList<Integer>>();
|
pl.names().stream()
|
||||||
|
.forEach(name -> propMap.put(name, pl.codepoints(name)));
|
||||||
|
}
|
||||||
|
|
||||||
|
private Map<String, List<Integer>> propMap =
|
||||||
|
new LinkedHashMap<String, List<Integer>>();
|
||||||
|
|
||||||
private PropList(File file, int plane) throws IOException {
|
private PropList(File file, int plane) throws IOException {
|
||||||
|
|
||||||
@ -78,7 +84,7 @@ public class PropList {
|
|||||||
start &= 0xffff;
|
start &= 0xffff;
|
||||||
end &= 0xffff;
|
end &= 0xffff;
|
||||||
|
|
||||||
ArrayList<Integer> list = propMap.get(name);
|
List<Integer> list = propMap.get(name);
|
||||||
if (list == null) {
|
if (list == null) {
|
||||||
list = new ArrayList<Integer>();
|
list = new ArrayList<Integer>();
|
||||||
propMap.put(name, list);
|
propMap.put(name, list);
|
||||||
|
@ -9917,7 +9917,18 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
|
* <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
|
||||||
* <li> {@link #getType(char) getType(ch)} returns
|
* <li> {@link #getType(char) getType(ch)} returns
|
||||||
* {@code LETTER_NUMBER}.
|
* {@code LETTER_NUMBER}.
|
||||||
|
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||||
|
* {@code Other_ID_Start}</a> character.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||||
|
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||||
|
* with the following profile of UAX31:
|
||||||
|
* <pre>
|
||||||
|
* Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
|
||||||
|
* </pre>
|
||||||
|
* {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
|
||||||
|
* compatibility.
|
||||||
*
|
*
|
||||||
* <p><b>Note:</b> This method cannot handle <a
|
* <p><b>Note:</b> This method cannot handle <a
|
||||||
* href="#supplementary"> supplementary characters</a>. To support
|
* href="#supplementary"> supplementary characters</a>. To support
|
||||||
@ -9947,7 +9958,19 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
* returns {@code true}
|
* returns {@code true}
|
||||||
* <li> {@link #getType(int) getType(codePoint)}
|
* <li> {@link #getType(int) getType(codePoint)}
|
||||||
* returns {@code LETTER_NUMBER}.
|
* returns {@code LETTER_NUMBER}.
|
||||||
|
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||||
|
* {@code Other_ID_Start}</a> character.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||||
|
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||||
|
* with the following profile of UAX31:
|
||||||
|
* <pre>
|
||||||
|
* Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
|
||||||
|
* </pre>
|
||||||
|
* {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
|
||||||
|
* compatibility.
|
||||||
|
*
|
||||||
* @param codePoint the character (Unicode code point) to be tested.
|
* @param codePoint the character (Unicode code point) to be tested.
|
||||||
* @return {@code true} if the character may start a Unicode
|
* @return {@code true} if the character may start a Unicode
|
||||||
* identifier; {@code false} otherwise.
|
* identifier; {@code false} otherwise.
|
||||||
@ -9975,7 +9998,22 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
* <li> it is a non-spacing mark
|
* <li> it is a non-spacing mark
|
||||||
* <li> {@code isIdentifierIgnorable} returns
|
* <li> {@code isIdentifierIgnorable} returns
|
||||||
* {@code true} for this character.
|
* {@code true} for this character.
|
||||||
|
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||||
|
* {@code Other_ID_Start}</a> character.
|
||||||
|
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
|
||||||
|
* {@code Other_ID_Continue}</a> character.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||||
|
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||||
|
* with the following profile of UAX31:
|
||||||
|
* <pre>
|
||||||
|
* Continue := Start + ID_Continue + ignorable
|
||||||
|
* Medial := empty
|
||||||
|
* ignorable := isIdentifierIgnorable(char) returns true for the character
|
||||||
|
* </pre>
|
||||||
|
* {@code ignorable} is added to {@code Continue} for backward
|
||||||
|
* compatibility.
|
||||||
*
|
*
|
||||||
* <p><b>Note:</b> This method cannot handle <a
|
* <p><b>Note:</b> This method cannot handle <a
|
||||||
* href="#supplementary"> supplementary characters</a>. To support
|
* href="#supplementary"> supplementary characters</a>. To support
|
||||||
@ -10010,7 +10048,23 @@ class Character implements java.io.Serializable, Comparable<Character> {
|
|||||||
* <li> it is a non-spacing mark
|
* <li> it is a non-spacing mark
|
||||||
* <li> {@code isIdentifierIgnorable} returns
|
* <li> {@code isIdentifierIgnorable} returns
|
||||||
* {@code true} for this character.
|
* {@code true} for this character.
|
||||||
|
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
|
||||||
|
* {@code Other_ID_Start}</a> character.
|
||||||
|
* <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
|
||||||
|
* {@code Other_ID_Continue}</a> character.
|
||||||
* </ul>
|
* </ul>
|
||||||
|
* <p>
|
||||||
|
* This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
|
||||||
|
* UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
|
||||||
|
* with the following profile of UAX31:
|
||||||
|
* <pre>
|
||||||
|
* Continue := Start + ID_Continue + ignorable
|
||||||
|
* Medial := empty
|
||||||
|
* ignorable := isIdentifierIgnorable(int) returns true for the character
|
||||||
|
* </pre>
|
||||||
|
* {@code ignorable} is added to {@code Continue} for backward
|
||||||
|
* compatibility.
|
||||||
|
*
|
||||||
* @param codePoint the character (Unicode code point) to be tested.
|
* @param codePoint the character (Unicode code point) to be tested.
|
||||||
* @return {@code true} if the character may be part of a
|
* @return {@code true} if the character may be part of a
|
||||||
* Unicode identifier; {@code false} otherwise.
|
* Unicode identifier; {@code false} otherwise.
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* @test
|
* @test
|
||||||
* @bug 8202771 8221431
|
* @bug 8202771 8221431 8229831
|
||||||
* @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
|
* @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
|
||||||
* /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
|
* /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
|
||||||
* /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
|
* /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
|
||||||
@ -182,7 +182,7 @@ public class CharPropTest {
|
|||||||
|
|
||||||
private static void isUnicodeIdentifierStartTest(int codePoint, String category) {
|
private static void isUnicodeIdentifierStartTest(int codePoint, String category) {
|
||||||
boolean actual = Character.isUnicodeIdentifierStart(codePoint);
|
boolean actual = Character.isUnicodeIdentifierStart(codePoint);
|
||||||
boolean expected = isUnicodeIdentifierStart(category);
|
boolean expected = isUnicodeIdentifierStart(codePoint, category);
|
||||||
if (actual != expected) {
|
if (actual != expected) {
|
||||||
printDiff(codePoint, "isUnicodeIdentifierStart", actual, expected);
|
printDiff(codePoint, "isUnicodeIdentifierStart", actual, expected);
|
||||||
}
|
}
|
||||||
@ -266,14 +266,33 @@ public class CharPropTest {
|
|||||||
|| isIdentifierIgnorable(codePoint, category);
|
|| isIdentifierIgnorable(codePoint, category);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isUnicodeIdentifierStart(String category) {
|
private static boolean isUnicodeIdentifierStart(int codePoint, String category) {
|
||||||
return isLetter(category) || category.equals("Nl");
|
return isLetter(category) || category.equals("Nl")
|
||||||
|
|| isOtherIDStart(codePoint);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static boolean isUnicodeIdentifierPart(int codePoint, String category) {
|
private static boolean isUnicodeIdentifierPart(int codePoint, String category) {
|
||||||
return isLetter(category) || category.equals("Pc") || category.equals("Nd")
|
return isLetter(category) || category.equals("Pc") || category.equals("Nd")
|
||||||
|| category.equals("Nl") || category.equals("Mc") || category.equals("Mn")
|
|| category.equals("Nl") || category.equals("Mc") || category.equals("Mn")
|
||||||
|| isIdentifierIgnorable(codePoint, category);
|
|| isIdentifierIgnorable(codePoint, category)
|
||||||
|
|| isOtherIDStart(codePoint)
|
||||||
|
|| isOtherIDContinue(codePoint);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isOtherIDStart(int codePoint) {
|
||||||
|
return codePoint == 0x1885 ||
|
||||||
|
codePoint == 0x1886 ||
|
||||||
|
codePoint == 0x2118 ||
|
||||||
|
codePoint == 0x212E ||
|
||||||
|
codePoint == 0x309B ||
|
||||||
|
codePoint == 0x309C;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static boolean isOtherIDContinue(int codePoint) {
|
||||||
|
return codePoint == 0x00B7 ||
|
||||||
|
codePoint == 0x0387 ||
|
||||||
|
(codePoint >= 0x1369 && codePoint <= 0x1371) ||
|
||||||
|
codePoint == 0x19DA;
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
|
private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
|
||||||
|
@ -24,8 +24,9 @@
|
|||||||
|
|
||||||
/**
|
/**
|
||||||
* @test
|
* @test
|
||||||
* @bug 7037261 7070436 7198195 8032446 8072600 8221431
|
* @bug 7037261 7070436 7198195 8032446 8072600 8221431 8229831
|
||||||
* @summary Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic
|
* @summary Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic/
|
||||||
|
* isUnicodeIdentifierStart/isUnicodeIdentifierPart
|
||||||
* @library /lib/testlibrary/java/lang
|
* @library /lib/testlibrary/java/lang
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -36,47 +37,17 @@ import static java.lang.Character.*;
|
|||||||
|
|
||||||
public class CheckProp {
|
public class CheckProp {
|
||||||
|
|
||||||
public static void main(String[] args) throws IOException {
|
public static void main(String[] args) {
|
||||||
File fPropList = UCDFiles.PROP_LIST.toFile();
|
Map<String, List<Integer>> propMap = new LinkedHashMap<>();
|
||||||
int i, j;
|
List.of(UCDFiles.PROP_LIST.toFile(), UCDFiles.DERIVED_PROPS.toFile()).stream()
|
||||||
BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
|
.forEach(f -> readPropMap(propMap, f));
|
||||||
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
|
|
||||||
Map<String, ArrayList<Integer>> propMap = new LinkedHashMap<>();
|
|
||||||
|
|
||||||
String line = null;
|
|
||||||
int lineNo = 0;
|
|
||||||
while ((line = sbfr.readLine()) != null) {
|
|
||||||
lineNo++;
|
|
||||||
if (line.length() <= 1 || line.charAt(0) == '#') {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
m.reset(line);
|
|
||||||
if (m.matches()) {
|
|
||||||
int start = Integer.parseInt(m.group(1), 16);
|
|
||||||
int end = (m.group(2)==null)?start
|
|
||||||
:Integer.parseInt(m.group(2), 16);
|
|
||||||
String name = m.group(3);
|
|
||||||
|
|
||||||
ArrayList<Integer> list = propMap.get(name);
|
|
||||||
if (list == null) {
|
|
||||||
list = new ArrayList<Integer>();
|
|
||||||
propMap.put(name, list);
|
|
||||||
}
|
|
||||||
while (start <= end)
|
|
||||||
list.add(start++);
|
|
||||||
} else {
|
|
||||||
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
sbfr.close();
|
|
||||||
//for (String name: propMap.keySet()) {
|
|
||||||
// System.out.printf("%s %d%n", name, propMap.get(name).size());
|
|
||||||
//}
|
|
||||||
|
|
||||||
Integer[] otherLowercase = propMap.get("Other_Lowercase").toArray(new Integer[0]);
|
Integer[] otherLowercase = propMap.get("Other_Lowercase").toArray(new Integer[0]);
|
||||||
Integer[] otherUppercase = propMap.get("Other_Uppercase").toArray(new Integer[0]);
|
Integer[] otherUppercase = propMap.get("Other_Uppercase").toArray(new Integer[0]);
|
||||||
Integer[] otherAlphabetic = propMap.get("Other_Alphabetic").toArray(new Integer[0]);
|
Integer[] otherAlphabetic = propMap.get("Other_Alphabetic").toArray(new Integer[0]);
|
||||||
Integer[] ideographic = propMap.get("Ideographic").toArray(new Integer[0]);
|
Integer[] ideographic = propMap.get("Ideographic").toArray(new Integer[0]);
|
||||||
|
Integer[] IDStart = propMap.get("ID_Start").toArray(new Integer[0]);
|
||||||
|
Integer[] IDContinue = propMap.get("ID_Continue").toArray(new Integer[0]);
|
||||||
|
|
||||||
int fails = 0;
|
int fails = 0;
|
||||||
for (int cp = MIN_CODE_POINT; cp < MAX_CODE_POINT; cp++) {
|
for (int cp = MIN_CODE_POINT; cp < MAX_CODE_POINT; cp++) {
|
||||||
@ -111,8 +82,63 @@ public class CheckProp {
|
|||||||
fails++;
|
fails++;
|
||||||
System.err.printf("Wrong isIdeographic(U+%04x)\n", cp);
|
System.err.printf("Wrong isIdeographic(U+%04x)\n", cp);
|
||||||
}
|
}
|
||||||
|
if (isUnicodeIdentifierStart(cp) !=
|
||||||
|
(cp == 0x2E2F ||
|
||||||
|
Arrays.binarySearch(IDStart, cp) >= 0))
|
||||||
|
{
|
||||||
|
fails++;
|
||||||
|
System.err.printf("Wrong isUnicodeIdentifierStart(U+%04x)\n", cp);
|
||||||
|
}
|
||||||
|
if (isUnicodeIdentifierPart(cp) !=
|
||||||
|
(isIdentifierIgnorable(cp) ||
|
||||||
|
cp == 0x2E2F ||
|
||||||
|
Arrays.binarySearch(IDContinue, cp) >= 0))
|
||||||
|
{
|
||||||
|
fails++;
|
||||||
|
System.err.printf("Wrong isUnicodeIdentifierPart(U+%04x)\n", cp);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if (fails != 0)
|
if (fails != 0)
|
||||||
throw new RuntimeException("CheckProp failed=" + fails);
|
throw new RuntimeException("CheckProp failed=" + fails);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void readPropMap(Map<String, List<Integer>> propMap, File fPropList) {
|
||||||
|
try {
|
||||||
|
BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
|
||||||
|
Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
|
||||||
|
|
||||||
|
String line = null;
|
||||||
|
int lineNo = 0;
|
||||||
|
while ((line = sbfr.readLine()) != null) {
|
||||||
|
lineNo++;
|
||||||
|
if (line.length() <= 1 || line.charAt(0) == '#') {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
m.reset(line);
|
||||||
|
if (m.matches()) {
|
||||||
|
int start = Integer.parseInt(m.group(1), 16);
|
||||||
|
int end = (m.group(2)==null)?start
|
||||||
|
:Integer.parseInt(m.group(2), 16);
|
||||||
|
String name = m.group(3);
|
||||||
|
|
||||||
|
List<Integer> list = propMap.get(name);
|
||||||
|
if (list == null) {
|
||||||
|
list = new ArrayList<Integer>();
|
||||||
|
propMap.put(name, list);
|
||||||
|
}
|
||||||
|
while (start <= end)
|
||||||
|
list.add(start++);
|
||||||
|
} else {
|
||||||
|
System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
sbfr.close();
|
||||||
|
} catch (IOException ioe) {
|
||||||
|
throw new UncheckedIOException(ioe);
|
||||||
|
}
|
||||||
|
|
||||||
|
//for (String name: propMap.keySet()) {
|
||||||
|
// System.out.printf("%s %d%n", name, propMap.get(name).size());
|
||||||
|
//}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -36,6 +36,8 @@ public class UCDFiles {
|
|||||||
|
|
||||||
public static Path BLOCKS =
|
public static Path BLOCKS =
|
||||||
UCD_DIR.resolve("Blocks.txt");
|
UCD_DIR.resolve("Blocks.txt");
|
||||||
|
public static Path DERIVED_PROPS =
|
||||||
|
UCD_DIR.resolve("DerivedCoreProperties.txt");
|
||||||
public static Path GRAPHEME_BREAK_PROPERTY =
|
public static Path GRAPHEME_BREAK_PROPERTY =
|
||||||
UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakProperty.txt");
|
UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakProperty.txt");
|
||||||
public static Path GRAPHEME_BREAK_TEST =
|
public static Path GRAPHEME_BREAK_TEST =
|
||||||
|
Loading…
Reference in New Issue
Block a user