8229831: Upgrade Character.isUnicodeIdentifierStart/Part() methods to the latest standard

Reviewed-by: rriggs
2024-11-27 06:10:37 +00:00 · 2019-09-05 17:38:54 -07:00 · 2019-09-05 17:38:54 -07:00 · 2aac0e925d
commit 2aac0e925d
parent 4d70cdac4f
13 changed files with 12096 additions and 75 deletions
--- a/make/data/characterdata/CharacterData00.java.template
+++ b/make/data/characterdata/CharacterData00.java.template
@ -115,13 +115,14 @@ class CharacterData00 extends CharacterData {
    }

    boolean isUnicodeIdentifierStart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
+        return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
+               ch == 0x2E2F;
    }

    boolean isUnicodeIdentifierPart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskUnicodePart) != 0);
+        return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
+               isIdentifierIgnorable(ch) ||
+               ch == 0x2E2F;
    }

    boolean isIdentifierIgnorable(int ch) {
--- a/make/data/characterdata/CharacterData01.java.template
+++ b/make/data/characterdata/CharacterData01.java.template
@ -114,13 +114,14 @@ class CharacterData01 extends CharacterData {
    }

    boolean isUnicodeIdentifierStart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
+        return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
+               ch == 0x2E2F;
    }

    boolean isUnicodeIdentifierPart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskUnicodePart) != 0);
+        return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
+               isIdentifierIgnorable(ch) ||
+               ch == 0x2E2F;
    }

    boolean isIdentifierIgnorable(int ch) {
--- a/make/data/characterdata/CharacterData02.java.template
+++ b/make/data/characterdata/CharacterData02.java.template
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -113,13 +113,14 @@ class CharacterData02 extends CharacterData {
    }

    boolean isUnicodeIdentifierStart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
+        return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
+               ch == 0x2E2F;
    }

    boolean isUnicodeIdentifierPart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskUnicodePart) != 0);
+        return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
+               isIdentifierIgnorable(ch) ||
+               ch == 0x2E2F;
    }

    boolean isIdentifierIgnorable(int ch) {
--- a/make/data/characterdata/CharacterData0E.java.template
+++ b/make/data/characterdata/CharacterData0E.java.template
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2003, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -113,15 +113,16 @@ class CharacterData0E extends CharacterData {
    }

    boolean isUnicodeIdentifierStart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
+        return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
+               ch == 0x2E2F;
    }

    boolean isUnicodeIdentifierPart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskUnicodePart) != 0);
+        return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
+               isIdentifierIgnorable(ch) ||
+               ch == 0x2E2F;
    }
-
+    
    boolean isIdentifierIgnorable(int ch) {
        int props = getProperties(ch);
        return ((props & $$maskIdentifierInfo) == $$valueIgnorable);
--- a/make/data/characterdata/CharacterDataLatin1.java.template
+++ b/make/data/characterdata/CharacterDataLatin1.java.template
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -133,13 +133,14 @@ class CharacterDataLatin1 extends CharacterData {
    }

    boolean isUnicodeIdentifierStart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskIdentifierInfo) == $$valueUnicodeStart);
+        return (getPropertiesEx(ch) & $$maskIDStart) != 0 ||
+               ch == 0x2E2F;
    }

    boolean isUnicodeIdentifierPart(int ch) {
-        int props = getProperties(ch);
-        return ((props & $$maskUnicodePart) != 0);
+        return (getPropertiesEx(ch) & $$maskIDContinue) != 0 ||
+               isIdentifierIgnorable(ch) ||
+               ch == 0x2E2F;
    }

    boolean isIdentifierIgnorable(int ch) {
--- a/make/data/unicodedata/DerivedCoreProperties.txt
+++ b/make/data/unicodedata/DerivedCoreProperties.txt
--- a/make/gensrc/GensrcCharacterData.gmk
+++ b/make/gensrc/GensrcCharacterData.gmk
@ -42,6 +42,7 @@ define SetupCharacterData
 	    -spec $(UNICODEDATA)/UnicodeData.txt \
 	    -specialcasing $(UNICODEDATA)/SpecialCasing.txt \
 	    -proplist $(UNICODEDATA)/PropList.txt \
+	    -derivedprops $(UNICODEDATA)/DerivedCoreProperties.txt \
 	    -o $(SUPPORT_OUTPUTDIR)/gensrc/java.base/java/lang/$1.java \
 	    -usecharforbyte $3

--- a/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/GenerateCharacter.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2002, 2015, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2002, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -73,6 +73,7 @@ public class GenerateCharacter {
    static String DefaultUnicodeSpecFileName  = ROOT + "UnicodeData.txt";
    static String DefaultSpecialCasingFileName = ROOT + "SpecialCasing.txt";
    static String DefaultPropListFileName     = ROOT + "PropList.txt";
+    static String DefaultDerivedPropsFileName = ROOT + "DerivedCoreProperties.txt";
    static String DefaultJavaTemplateFileName = ROOT + "Character.java.template";
    static String DefaultJavaOutputFileName   = ROOT + "Character.java";
    static String DefaultCTemplateFileName    = ROOT + "Character.c.template";
@ -159,6 +160,8 @@ public class GenerateCharacter {
    1 bit Other_Math property
    1 bit Ideographic property
    1 bit Noncharacter codepoint property
+    1 bit ID_Start property
+    1 bit ID_Continue property
    */


@ -190,7 +193,7 @@ public class GenerateCharacter {
    // maskMirrored needs to be long, if up 16-bit
    private static final long maskMirrored          = 0x80000000L;

-    // bit masks identify the 16-bit priperty field described above, in B
+    // bit masks identify the 16-bit property field described above, in B
    // table
    private static final long
        maskOtherLowercase  = 0x100000000L,
@ -198,7 +201,9 @@ public class GenerateCharacter {
        maskOtherAlphabetic = 0x400000000L,
        maskOtherMath       = 0x800000000L,
        maskIdeographic     = 0x1000000000L,
-        maskNoncharacterCP  = 0x2000000000L;
+        maskNoncharacterCP  = 0x2000000000L,
+        maskIDStart         = 0x4000000000L,
+        maskIDContinue      = 0x8000000000L;

    // Can compare masked values with these to determine
    // numeric or lexical types.
@ -367,6 +372,8 @@ public class GenerateCharacter {
        addExProp(result, propList, "Ideographic", maskIdeographic);
        //addExProp(result, propList, "Other_Math", maskOtherMath);
        //addExProp(result, propList, "Noncharacter_CodePoint", maskNoncharacterCP);
+        addExProp(result, propList, "ID_Start", maskIDStart);
+        addExProp(result, propList, "ID_Continue", maskIDContinue);

        return result;
    }
@ -780,6 +787,8 @@ OUTER:  for (int i = 0; i < n; i += m) {
        if (x.equals("maskOtherUppercase")) return "0x" + hex4(maskOtherUppercase >> 32);
        if (x.equals("maskOtherAlphabetic")) return "0x" + hex4(maskOtherAlphabetic >> 32);
        if (x.equals("maskIdeographic")) return "0x" + hex4(maskIdeographic >> 32);
+        if (x.equals("maskIDStart")) return "0x" + hex4(maskIDStart >> 32);
+        if (x.equals("maskIDContinue")) return "0x" + hex4(maskIDContinue >> 32);
        if (x.equals("valueIgnorable")) return "0x" + hex8(valueIgnorable);
        if (x.equals("valueJavaUnicodeStart")) return "0x" + hex8(valueJavaUnicodeStart);
        if (x.equals("valueJavaOnlyStart")) return "0x" + hex8(valueJavaOnlyStart);
@ -1612,6 +1621,7 @@ OUTER:  for (int i = 0; i < n; i += m) {
    static String UnicodeSpecFileName = null; // liu
    static String SpecialCasingFileName = null;
    static String PropListFileName = null;
+    static String DerivedPropsFileName = null;
    static boolean useCharForByte = false;
    static int[] sizes;
    static int bins = 0; // liu; if > 0, then perform search
@ -1739,6 +1749,14 @@ OUTER:  for (int i = 0; i < n; i += m) {
                    PropListFileName = args[++j];
                }
            }
+            else if (args[j].equals("-derivedprops")) {
+                if (j == args.length -1) {
+                    FAIL("File name missing after -derivedprops");
+                }
+                else {
+                    DerivedPropsFileName = args[++j];
+                }
+            }
            else if (args[j].equals("-plane")) {
                if (j == args.length -1) {
                    FAIL("Plane number missing after -plane");
@ -1803,6 +1821,10 @@ OUTER:  for (int i = 0; i < n; i += m) {
            PropListFileName = DefaultPropListFileName;
            desc.append(" [-proplist " + PropListFileName + ']');
        }
+        if (DerivedPropsFileName == null) {
+            DerivedPropsFileName = DefaultDerivedPropsFileName;
+            desc.append(" [-derivedprops " + DerivedPropsFileName + ']');
+        }
        if (TemplateFileName == null) {
            TemplateFileName = (Csyntax ? DefaultCTemplateFileName
                  : DefaultJavaTemplateFileName);
@ -1954,6 +1976,7 @@ OUTER:  for (int i = 0; i < n; i += m) {
            UnicodeSpec[] data = UnicodeSpec.readSpecFile(new File(UnicodeSpecFileName), plane);
            specialCaseMaps = SpecialCaseMap.readSpecFile(new File(SpecialCasingFileName), plane);
            PropList propList = PropList.readSpecFile(new File(PropListFileName), plane);
+            propList.putAll(PropList.readSpecFile(new File(DerivedPropsFileName), plane));

            if (verbose) {
                System.out.println(data.length + " items read from Unicode spec file " + UnicodeSpecFileName); // liu
--- a/make/jdk/src/classes/build/tools/generatecharacter/PropList.java
+++ b/make/jdk/src/classes/build/tools/generatecharacter/PropList.java
@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2011, 2013, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2011, 2019, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
@ -31,7 +31,8 @@ import java.io.*;

 /**
 * A PropList object contains the lists of code points that have
- * the same Unicode property defined in PropList.txt
+ * the same Unicode property defined in PropList.txt and
+ * DerivedCoreProperties.txt
 *
 * @author Xueming Shen
 */
@ -51,8 +52,13 @@ public class PropList {
        return propMap.keySet();
    }

-    private Map<String, ArrayList<Integer>> propMap =
-        new LinkedHashMap<String, ArrayList<Integer>>();
+    public void putAll(PropList pl) {
+        pl.names().stream()
+            .forEach(name -> propMap.put(name, pl.codepoints(name)));
+    }
+
+    private Map<String, List<Integer>> propMap =
+        new LinkedHashMap<String, List<Integer>>();

    private PropList(File file, int plane) throws IOException {

@ -78,7 +84,7 @@ public class PropList {
                start &= 0xffff;
                end &= 0xffff;

-                ArrayList<Integer> list = propMap.get(name);
+                List<Integer> list = propMap.get(name);
                if (list == null) {
                    list = new ArrayList<Integer>();
                    propMap.put(name, list);
--- a/src/java.base/share/classes/java/lang/Character.java
+++ b/src/java.base/share/classes/java/lang/Character.java
@ -9917,7 +9917,18 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * <li> {@link #isLetter(char) isLetter(ch)} returns {@code true}
     * <li> {@link #getType(char) getType(ch)} returns
     *      {@code LETTER_NUMBER}.
+     * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
+     *      {@code Other_ID_Start}</a> character.
     * </ul>
+     * <p>
+     * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
+     * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
+     * with the following profile of UAX31:
+     * <pre>
+     * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
+     * </pre>
+     * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
+     * compatibility.
     *
     * <p><b>Note:</b> This method cannot handle <a
     * href="#supplementary"> supplementary characters</a>. To support
@ -9947,7 +9958,19 @@ class Character implements java.io.Serializable, Comparable<Character> {
     *      returns {@code true}
     * <li> {@link #getType(int) getType(codePoint)}
     *      returns {@code LETTER_NUMBER}.
+     * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
+     *      {@code Other_ID_Start}</a> character.
     * </ul>
+     * <p>
+     * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
+     * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
+     * with the following profile of UAX31:
+     * <pre>
+     * Start := ID_Start + 'VERTICAL TILDE' (U+2E2F)
+     * </pre>
+     * {@code 'VERTICAL TILDE'} is added to {@code Start} for backward
+     * compatibility.
+     *
     * @param   codePoint the character (Unicode code point) to be tested.
     * @return  {@code true} if the character may start a Unicode
     *          identifier; {@code false} otherwise.
@ -9975,7 +9998,22 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * <li>  it is a non-spacing mark
     * <li> {@code isIdentifierIgnorable} returns
     * {@code true} for this character.
+     * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
+     *      {@code Other_ID_Start}</a> character.
+     * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
+     *      {@code Other_ID_Continue}</a> character.
     * </ul>
+     * <p>
+     * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
+     * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
+     * with the following profile of UAX31:
+     * <pre>
+     * Continue := Start + ID_Continue + ignorable
+     * Medial := empty
+     * ignorable := isIdentifierIgnorable(char) returns true for the character
+     * </pre>
+     * {@code ignorable} is added to {@code Continue} for backward
+     * compatibility.
     *
     * <p><b>Note:</b> This method cannot handle <a
     * href="#supplementary"> supplementary characters</a>. To support
@ -10010,7 +10048,23 @@ class Character implements java.io.Serializable, Comparable<Character> {
     * <li>  it is a non-spacing mark
     * <li> {@code isIdentifierIgnorable} returns
     * {@code true} for this character.
+     * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Start">
+     *      {@code Other_ID_Start}</a> character.
+     * <li> it is an <a href="http://www.unicode.org/reports/tr44/#Other_ID_Continue">
+     *      {@code Other_ID_Continue}</a> character.
     * </ul>
+     * <p>
+     * This method conforms to <a href="https://unicode.org/reports/tr31/#R1">
+     * UAX31-R1: Default Identifiers</a> requirement of the Unicode Standard,
+     * with the following profile of UAX31:
+     * <pre>
+     * Continue := Start + ID_Continue + ignorable
+     * Medial := empty
+     * ignorable := isIdentifierIgnorable(int) returns true for the character
+     * </pre>
+     * {@code ignorable} is added to {@code Continue} for backward
+     * compatibility.
+     *
     * @param   codePoint the character (Unicode code point) to be tested.
     * @return  {@code true} if the character may be part of a
     *          Unicode identifier; {@code false} otherwise.
--- a/test/jdk/java/lang/Character/CharPropTest.java
+++ b/test/jdk/java/lang/Character/CharPropTest.java
@ -23,7 +23,7 @@

 /*
 * @test
- * @bug 8202771 8221431
+ * @bug 8202771 8221431 8229831
 * @summary Check j.l.Character.isDigit/isLetter/isLetterOrDigit/isSpaceChar
 * /isWhitespace/isTitleCase/isISOControl/isIdentifierIgnorable
 * /isJavaIdentifierStart/isJavaIdentifierPart/isUnicodeIdentifierStart
@ -182,7 +182,7 @@ public class CharPropTest {

    private static void isUnicodeIdentifierStartTest(int codePoint, String category) {
        boolean actual = Character.isUnicodeIdentifierStart(codePoint);
-        boolean expected = isUnicodeIdentifierStart(category);
+        boolean expected = isUnicodeIdentifierStart(codePoint, category);
        if (actual != expected) {
            printDiff(codePoint, "isUnicodeIdentifierStart", actual, expected);
        }
@ -266,14 +266,33 @@ public class CharPropTest {
               || isIdentifierIgnorable(codePoint, category);
    }

-    private static boolean isUnicodeIdentifierStart(String category) {
-        return isLetter(category) || category.equals("Nl");
+    private static boolean isUnicodeIdentifierStart(int codePoint, String category) {
+        return isLetter(category) || category.equals("Nl")
+               || isOtherIDStart(codePoint);
    }

    private static boolean isUnicodeIdentifierPart(int codePoint, String category) {
        return isLetter(category) || category.equals("Pc") || category.equals("Nd")
               || category.equals("Nl") || category.equals("Mc") || category.equals("Mn")
-               || isIdentifierIgnorable(codePoint, category);
+               || isIdentifierIgnorable(codePoint, category)
+               || isOtherIDStart(codePoint)
+               || isOtherIDContinue(codePoint);
+    }
+
+    private static boolean isOtherIDStart(int codePoint) {
+        return codePoint == 0x1885 ||
+               codePoint == 0x1886 ||
+               codePoint == 0x2118 ||
+               codePoint == 0x212E ||
+               codePoint == 0x309B ||
+               codePoint == 0x309C;
+    }
+
+    private static boolean isOtherIDContinue(int codePoint) {
+        return codePoint == 0x00B7 ||
+               codePoint == 0x0387 ||
+              (codePoint >= 0x1369 && codePoint <= 0x1371) ||
+               codePoint == 0x19DA;
    }

    private static void printDiff(int codePoint, String method, boolean actual, boolean expected) {
--- a/test/jdk/java/lang/Character/CheckProp.java
+++ b/test/jdk/java/lang/Character/CheckProp.java
@ -24,8 +24,9 @@

 /**
 * @test
- * @bug 7037261 7070436 7198195 8032446 8072600 8221431
- * @summary  Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic
+ * @bug 7037261 7070436 7198195 8032446 8072600 8221431 8229831
+ * @summary  Check j.l.Character.isLowerCase/isUppercase/isAlphabetic/isIdeographic/
+ *              isUnicodeIdentifierStart/isUnicodeIdentifierPart
 * @library /lib/testlibrary/java/lang
 */

@ -36,47 +37,17 @@ import static java.lang.Character.*;

 public class CheckProp {

-    public static void main(String[] args) throws IOException {
-        File fPropList = UCDFiles.PROP_LIST.toFile();
-        int i, j;
-        BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
-        Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
-        Map<String, ArrayList<Integer>> propMap =  new LinkedHashMap<>();
-
-        String line = null;
-        int lineNo = 0;
-        while ((line = sbfr.readLine()) != null) {
-            lineNo++;
-            if (line.length() <= 1 || line.charAt(0) == '#') {
-                continue;
-            }
-            m.reset(line);
-            if (m.matches()) {
-                int start = Integer.parseInt(m.group(1), 16);
-                int end = (m.group(2)==null)?start
-                          :Integer.parseInt(m.group(2), 16);
-                String name = m.group(3);
-
-                ArrayList<Integer> list = propMap.get(name);
-                if (list == null) {
-                    list = new ArrayList<Integer>();
-                    propMap.put(name, list);
-                }
-                while (start <= end)
-                    list.add(start++);
-            } else {
-                System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
-            }
-        }
-        sbfr.close();
-        //for (String name: propMap.keySet()) {
-        //    System.out.printf("%s    %d%n", name, propMap.get(name).size());
-        //}
+    public static void main(String[] args) {
+        Map<String, List<Integer>> propMap =  new LinkedHashMap<>();
+        List.of(UCDFiles.PROP_LIST.toFile(), UCDFiles.DERIVED_PROPS.toFile()).stream()
+            .forEach(f -> readPropMap(propMap, f));

        Integer[] otherLowercase = propMap.get("Other_Lowercase").toArray(new Integer[0]);
        Integer[] otherUppercase = propMap.get("Other_Uppercase").toArray(new Integer[0]);
        Integer[] otherAlphabetic = propMap.get("Other_Alphabetic").toArray(new Integer[0]);
        Integer[] ideographic = propMap.get("Ideographic").toArray(new Integer[0]);
+        Integer[] IDStart = propMap.get("ID_Start").toArray(new Integer[0]);
+        Integer[] IDContinue = propMap.get("ID_Continue").toArray(new Integer[0]);

        int fails = 0;
        for (int cp = MIN_CODE_POINT; cp < MAX_CODE_POINT; cp++) {
@ -111,8 +82,63 @@ public class CheckProp {
                fails++;
                System.err.printf("Wrong isIdeographic(U+%04x)\n", cp);
            }
+            if (isUnicodeIdentifierStart(cp) !=
+                (cp == 0x2E2F ||
+                 Arrays.binarySearch(IDStart, cp) >= 0))
+            {
+                fails++;
+                System.err.printf("Wrong isUnicodeIdentifierStart(U+%04x)\n", cp);
+            }
+            if (isUnicodeIdentifierPart(cp) !=
+                (isIdentifierIgnorable(cp) ||
+                 cp == 0x2E2F ||
+                 Arrays.binarySearch(IDContinue, cp) >= 0))
+            {
+                fails++;
+                System.err.printf("Wrong isUnicodeIdentifierPart(U+%04x)\n", cp);
+            }
        }
        if (fails != 0)
            throw new RuntimeException("CheckProp failed=" + fails);
    }
+
+    private static void readPropMap(Map<String, List<Integer>> propMap, File fPropList) {
+        try {
+            BufferedReader sbfr = new BufferedReader(new FileReader(fPropList));
+            Matcher m = Pattern.compile("(\\p{XDigit}+)(?:\\.{2}(\\p{XDigit}+))?\\s*;\\s+(\\w+)\\s+#.*").matcher("");
+
+            String line = null;
+            int lineNo = 0;
+            while ((line = sbfr.readLine()) != null) {
+                lineNo++;
+                if (line.length() <= 1 || line.charAt(0) == '#') {
+                    continue;
+                }
+                m.reset(line);
+                if (m.matches()) {
+                    int start = Integer.parseInt(m.group(1), 16);
+                    int end = (m.group(2)==null)?start
+                              :Integer.parseInt(m.group(2), 16);
+                    String name = m.group(3);
+
+                    List<Integer> list = propMap.get(name);
+                    if (list == null) {
+                        list = new ArrayList<Integer>();
+                        propMap.put(name, list);
+                    }
+                    while (start <= end)
+                        list.add(start++);
+                } else {
+                    System.out.printf("Warning: Unrecognized line %d <%s>%n", lineNo, line);
+                }
+            }
+            sbfr.close();
+        } catch (IOException ioe) {
+            throw new UncheckedIOException(ioe);
+        }
+
+        //for (String name: propMap.keySet()) {
+        //    System.out.printf("%s    %d%n", name, propMap.get(name).size());
+        //}
+    }
 }
--- a/test/jdk/lib/testlibrary/java/lang/UCDFiles.java
+++ b/test/jdk/lib/testlibrary/java/lang/UCDFiles.java
@ -36,6 +36,8 @@ public class UCDFiles {

    public static Path BLOCKS =
        UCD_DIR.resolve("Blocks.txt");
+    public static Path DERIVED_PROPS =
+        UCD_DIR.resolve("DerivedCoreProperties.txt");
    public static Path GRAPHEME_BREAK_PROPERTY =
        UCD_DIR.resolve("auxiliary").resolve("GraphemeBreakProperty.txt");
    public static Path GRAPHEME_BREAK_TEST =