mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-23 21:01:08 +00:00
246c6b03a7
This commit ensures that the following operators use category I from MathML Core's operator dictionary [1] [2]: U+1EEF0 ARABIC MATHEMATICAL OPERATOR MEEM WITH HAH WITH TATWEEL U+1EEF1 ARABIC MATHEMATICAL OPERATOR HAH WITH DAL which corresponds to zero lspace/rspace and stretchy. There should already be exhaustive WPT tests operator-dictionary-* to check these and other properties, but they may be shadowed by existing failures or Firefox bugs, so add some more specific reftests for spacing and stretching. However, nsMathMLmoFrame and nsMathMLChar don't handle non-BMP characters very well, so only the first one currently passes. Also tweak updateOperatorDictionary.pl to ignore these special operators. [1] https://w3c.github.io/mathml-core/#dfn-algorithm-to-determine-the-category-of-an-operator [2] https://w3c.github.io/mathml-core/#operator-dictionary-categories-values Differential Revision: https://phabricator.services.mozilla.com/D157788
460 lines
14 KiB
Perl
Executable File
460 lines
14 KiB
Perl
Executable File
#!/usr/bin/perl
|
|
# -*- Mode: Perl; tab-width: 2; indent-tabs-mode: nil; -*-
|
|
# This Source Code Form is subject to the terms of the Mozilla Public
|
|
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
|
|
use XML::LibXSLT;
|
|
use XML::LibXML;
|
|
use LWP::Simple;
|
|
|
|
# output files
|
|
$FILE_UNICODE = "unicode.xml";
|
|
$FILE_DICTIONARY = "dictionary.xml";
|
|
$FILE_DIFFERENCES = "differences.txt";
|
|
$FILE_NEW_DICTIONARY = "new_dictionary.txt";
|
|
$FILE_SYNTAX_ERRORS = "syntax_errors.txt";
|
|
|
|
# our dictionary (property file)
|
|
$MOZ_DICTIONARY = "mathfont.properties";
|
|
|
|
# dictionary provided by the W3C in "XML Entity Definitions for Characters"
|
|
$WG_DICTIONARY_URL = "https://raw.githubusercontent.com/w3c/xml-entities/gh-pages/unicode.xml";
|
|
|
|
# XSL stylesheet to extract relevant data from the dictionary
|
|
$DICTIONARY_XSL = "operatorDictionary.xsl";
|
|
|
|
# dictionary provided by the W3C transformed with operatorDictionary.xsl
|
|
$WG_DICTIONARY = $FILE_DICTIONARY;
|
|
|
|
if (!($#ARGV >= 0 &&
|
|
((($ARGV[0] eq "download") && $#ARGV <= 1) ||
|
|
(($ARGV[0] eq "compare") && $#ARGV <= 1) ||
|
|
(($ARGV[0] eq "check") && $#ARGV <= 0) ||
|
|
(($ARGV[0] eq "clean") && $#ARGV <= 0)))) {
|
|
&usage;
|
|
}
|
|
|
|
if ($ARGV[0] eq "download") {
|
|
if ($#ARGV == 1) {
|
|
$WG_DICTIONARY_URL = $ARGV[1];
|
|
}
|
|
print "Downloading $WG_DICTIONARY_URL...\n";
|
|
getstore($WG_DICTIONARY_URL, $FILE_UNICODE);
|
|
|
|
print "Converting $FILE_UNICODE into $FILE_DICTIONARY...\n";
|
|
my $xslt = XML::LibXSLT->new();
|
|
my $source = XML::LibXML->load_xml(location => $FILE_UNICODE);
|
|
my $style_doc = XML::LibXML->load_xml(location => $DICTIONARY_XSL,
|
|
no_cdata=>1);
|
|
my $stylesheet = $xslt->parse_stylesheet($style_doc);
|
|
my $results = $stylesheet->transform($source);
|
|
open($file, ">$FILE_DICTIONARY") || die ("Couldn't open $FILE_DICTIONARY!");
|
|
print $file $stylesheet->output_as_bytes($results);
|
|
close($file);
|
|
exit 0;
|
|
}
|
|
|
|
if ($ARGV[0] eq "clean") {
|
|
unlink($FILE_UNICODE,
|
|
$FILE_DICTIONARY,
|
|
$FILE_DIFFERENCES,
|
|
$FILE_NEW_DICTIONARY,
|
|
$FILE_SYNTAX_ERRORS);
|
|
exit 0;
|
|
}
|
|
|
|
if ($ARGV[0] eq "compare" && $#ARGV == 1) {
|
|
$WG_DICTIONARY = $ARGV[1];
|
|
}
|
|
|
|
################################################################################
|
|
# structure of the dictionary used by this script:
|
|
# - key: same as in mathfont.properties
|
|
# - table:
|
|
# index | value
|
|
# 0 | description
|
|
# 1 | lspace
|
|
# 2 | rspace
|
|
# 4 | largeop
|
|
# 5 | movablelimits
|
|
# 6 | stretchy
|
|
# 7 | separator
|
|
# 8 | accent
|
|
# 9 | fence
|
|
# 10 | symmetric
|
|
# 13 | direction
|
|
|
|
# 1) build %moz_hash from $MOZ_DICTIONARY
|
|
|
|
print "loading $MOZ_DICTIONARY...\n";
|
|
open($file, $MOZ_DICTIONARY) || die ("Couldn't open $MOZ_DICTIONARY!");
|
|
|
|
print "building dictionary...\n";
|
|
while (<$file>) {
|
|
next unless (m/^operator\.(.*)$/);
|
|
(m/^([\w|\.|\\]*)\s=\s(.*)\s#\s(.*)$/);
|
|
|
|
# 1.1) build the key
|
|
$key = $1;
|
|
|
|
# 1.2) build the array
|
|
$_ = $2;
|
|
@value = ();
|
|
$value[0] = $3;
|
|
if (m/^(.*)lspace:(\d)(.*)$/) { $value[1] = $2; } else { $value[1] = "5"; }
|
|
if (m/^(.*)rspace:(\d)(.*)$/) { $value[2] = $2; } else { $value[2] = "5"; }
|
|
$value[4] = (m/^(.*)largeop(.*)$/);
|
|
$value[5] = (m/^(.*)movablelimits(.*)$/);
|
|
$value[6] = (m/^(.*)stretchy(.*)$/);
|
|
$value[7] = (m/^(.*)separator(.*)$/);
|
|
$value[8] = (m/^(.*)accent(.*)$/);
|
|
$value[9] = (m/^(.*)fence(.*)$/);
|
|
$value[10] = (m/^(.*)symmetric(.*)$/);
|
|
if (m/^(.*)direction:([a-z]*)(.*)$/) { $value[13] = $2; }
|
|
else { $value[13] = ""; }
|
|
|
|
# 1.3) save the key and value
|
|
$moz_hash{$key} = [ @value ];
|
|
}
|
|
|
|
close($file);
|
|
|
|
################################################################################
|
|
# 2) If mode "check", verify validity of our operator dictionary and quit.
|
|
# If mode "compare", go to step 3)
|
|
|
|
if ($ARGV[0] eq "check") {
|
|
print "checking operator dictionary...\n";
|
|
open($file_syntax_errors, ">$FILE_SYNTAX_ERRORS") ||
|
|
die ("Couldn't open $FILE_SYNTAX_ERRORS!");
|
|
|
|
$nb_errors = 0;
|
|
$nb_warnings = 0;
|
|
@moz_keys = (keys %moz_hash);
|
|
# check the validity of our private data
|
|
while ($key = pop(@moz_keys)) {
|
|
|
|
if ($key =~ /\\u.+\\u.+\\u.+/) {
|
|
$valid = 0;
|
|
$nb_errors++;
|
|
print $file_syntax_errors "error: \"$key\" has more than 2 characters\n";
|
|
}
|
|
|
|
if ($key =~ /\\u20D2\./ || $key =~ /\\u0338\./) {
|
|
$valid = 0;
|
|
$nb_errors++;
|
|
print $file_syntax_errors "error: \"$key\" ends with character U+20D2 or U+0338\n";
|
|
}
|
|
|
|
@moz = @{ $moz_hash{$key} };
|
|
$entry = &generateEntry($key, @moz);
|
|
$valid = 1;
|
|
|
|
if (!(@moz[13] eq "" ||
|
|
@moz[13] eq "horizontal" ||
|
|
@moz[13] eq "vertical")) {
|
|
$valid = 0;
|
|
$nb_errors++;
|
|
print $file_syntax_errors "error: invalid direction \"$moz[13]\"\n";
|
|
}
|
|
|
|
if (@moz[4] && !(@moz[13] eq "vertical")) {
|
|
$valid = 0;
|
|
$nb_errors++;
|
|
print $file_syntax_errors "error: operator is largeop but does not have vertical direction\n";
|
|
}
|
|
|
|
if (!$valid) {
|
|
print $file_syntax_errors $entry;
|
|
print $file_syntax_errors "\n";
|
|
}
|
|
}
|
|
|
|
# check that all forms have the same direction.
|
|
@moz_keys = (keys %moz_hash);
|
|
while ($key = pop(@moz_keys)) {
|
|
|
|
if (@{ $moz_hash{$key} }) {
|
|
# the operator has not been removed from the hash table yet.
|
|
|
|
$_ = $key;
|
|
(m/^([\w|\.|\\]*)\.(prefix|infix|postfix)$/);
|
|
$key_prefix = "$1.prefix";
|
|
$key_infix = "$1.infix";
|
|
$key_postfix = "$1.postfix";
|
|
@moz_prefix = @{ $moz_hash{$key_prefix} };
|
|
@moz_infix = @{ $moz_hash{$key_infix} };
|
|
@moz_postfix = @{ $moz_hash{$key_postfix} };
|
|
|
|
$same_direction = 1;
|
|
|
|
if (@moz_prefix) {
|
|
if (@moz_infix &&
|
|
!($moz_infix[13] eq $moz_prefix[13])) {
|
|
$same_direction = 0;
|
|
}
|
|
if (@moz_postfix &&
|
|
!($moz_postfix[13] eq $moz_prefix[13])) {
|
|
$same_direction = 0;
|
|
}
|
|
}
|
|
if (@moz_infix) {
|
|
if (@moz_postfix &&
|
|
!($moz_postfix[13] eq $moz_infix[13])) {
|
|
$same_direction = 0;
|
|
}
|
|
}
|
|
|
|
if (!$same_direction) {
|
|
$nb_errors++;
|
|
print $file_syntax_errors
|
|
"error: operator has a stretchy form, but all forms";
|
|
print $file_syntax_errors
|
|
" have not the same direction\n";
|
|
if (@moz_prefix) {
|
|
$_ = &generateEntry($key_prefix, @moz_prefix);
|
|
print $file_syntax_errors $_;
|
|
}
|
|
if (@moz_infix) {
|
|
$_ = &generateEntry($key_infix, @moz_infix);
|
|
print $file_syntax_errors $_;
|
|
}
|
|
if (@moz_postfix) {
|
|
$_ = &generateEntry($key_postfix, @moz_postfix);
|
|
print $file_syntax_errors $_;
|
|
}
|
|
print $file_syntax_errors "\n";
|
|
}
|
|
|
|
if (@moz_prefix) {
|
|
delete $moz_hash{$key.prefix};
|
|
}
|
|
if (@moz_infix) {
|
|
delete $moz_hash{$key_infix};
|
|
}
|
|
if (@moz_postfix) {
|
|
delete $moz_hash{$key_postfix};
|
|
}
|
|
}
|
|
}
|
|
|
|
close($file_syntax_errors);
|
|
print "\n";
|
|
if ($nb_errors > 0 || $nb_warnings > 0) {
|
|
print "$nb_errors error(s) found\n";
|
|
print "$nb_warnings warning(s) found\n";
|
|
print "See output file $FILE_SYNTAX_ERRORS.\n\n";
|
|
} else {
|
|
print "No error found.\n\n";
|
|
}
|
|
|
|
exit 0;
|
|
}
|
|
|
|
################################################################################
|
|
# 3) build %wg_hash and @wg_keys from the page $WG_DICTIONARY
|
|
|
|
print "loading $WG_DICTIONARY...\n";
|
|
my $parser = XML::LibXML->new();
|
|
my $doc = $parser->parse_file($WG_DICTIONARY);
|
|
|
|
print "building dictionary...\n";
|
|
@wg_keys = ();
|
|
|
|
foreach my $entry ($doc->findnodes('/root/entry')) {
|
|
# 3.1) build the key
|
|
$key = "operator.";
|
|
|
|
$_ = $entry->getAttribute("unicode");
|
|
|
|
# Skip non-BMP Arabic characters that are handled specially.
|
|
if ($_ == "U1EEF0" || $_ == "U1EEF1") {
|
|
next;
|
|
}
|
|
|
|
$_ = "$_-";
|
|
while (m/^U?0(\w*)-(.*)$/) {
|
|
# Concatenate .\uNNNN
|
|
$key = "$key\\u$1";
|
|
$_ = $2;
|
|
}
|
|
|
|
$_ = $entry->getAttribute("form"); # "Form"
|
|
$key = "$key.$_";
|
|
|
|
# 3.2) build the array
|
|
@value = ();
|
|
$value[0] = lc($entry->getAttribute("description"));
|
|
$value[1] = $entry->getAttribute("lspace");
|
|
if ($value[1] eq "") { $value[1] = "5"; }
|
|
$value[2] = $entry->getAttribute("rspace");
|
|
if ($value[2] eq "") { $value[2] = "5"; }
|
|
|
|
$_ = $entry->getAttribute("properties");
|
|
$value[4] = (m/^(.*)largeop(.*)$/);
|
|
$value[5] = (m/^(.*)movablelimits(.*)$/);
|
|
$value[6] = (m/^(.*)stretchy(.*)$/);
|
|
$value[7] = (m/^(.*)separator(.*)$/);
|
|
$value[9] = (m/^(.*)fence(.*)$/);
|
|
$value[10] = (m/^(.*)symmetric(.*)$/);
|
|
|
|
# not stored in the WG dictionary
|
|
$value[8] = ""; # accent
|
|
$value[13] = ""; # direction
|
|
|
|
# 3.3) save the key and value
|
|
push(@wg_keys, $key);
|
|
$wg_hash{$key} = [ @value ];
|
|
}
|
|
@wg_keys = reverse(@wg_keys);
|
|
|
|
################################################################################
|
|
# 4) Compare the two dictionaries and output the result
|
|
|
|
print "comparing dictionaries...\n";
|
|
open($file_differences, ">$FILE_DIFFERENCES") ||
|
|
die ("Couldn't open $FILE_DIFFERENCES!");
|
|
open($file_new_dictionary, ">$FILE_NEW_DICTIONARY") ||
|
|
die ("Couldn't open $FILE_NEW_DICTIONARY!");
|
|
|
|
$conflicting = 0; $conflicting_stretching = 0;
|
|
$new = 0; $new_stretching = 0;
|
|
$obsolete = 0; $obsolete_stretching = 0;
|
|
$unchanged = 0;
|
|
|
|
# 4.1) look to the entries of the WG dictionary
|
|
while ($key = pop(@wg_keys)) {
|
|
|
|
@wg = @{ $wg_hash{$key} };
|
|
delete $wg_hash{$key};
|
|
$wg_value = &generateCommon(@wg);
|
|
|
|
if (exists($moz_hash{$key})) {
|
|
# entry is in both dictionary
|
|
@moz = @{ $moz_hash{$key} };
|
|
delete $moz_hash{$key};
|
|
$moz_value = &generateCommon(@moz);
|
|
if ($moz_value ne $wg_value) {
|
|
# conflicting entry
|
|
print $file_differences "[conflict]";
|
|
$conflicting++;
|
|
if ($moz[6] != $wg[6]) {
|
|
print $file_differences "[stretching]";
|
|
$conflicting_stretching++;
|
|
}
|
|
print $file_differences " - $key ($wg[0])\n";
|
|
print $file_differences "-$moz_value\n+$wg_value\n\n";
|
|
$_ = &completeCommon($wg_value, $key, @moz, @wg);
|
|
print $file_new_dictionary $_;
|
|
} else {
|
|
# unchanged entry
|
|
$unchanged++;
|
|
$_ = &completeCommon($wg_value, $key, @moz, @wg);
|
|
print $file_new_dictionary $_;
|
|
}
|
|
} else {
|
|
# we don't have this entry in our dictionary yet
|
|
print $file_differences "[new entry]";
|
|
$new++;
|
|
if ($wg[6]) {
|
|
print $file_differences "[stretching]";
|
|
$new_stretching++;
|
|
}
|
|
print $file_differences " - $key ($wg[0])\n";
|
|
print $file_differences "-\n+$wg_value\n\n";
|
|
$_ = &completeCommon($wg_value, $key, (), @wg);
|
|
print $file_new_dictionary $_;
|
|
}
|
|
}
|
|
|
|
print $file_new_dictionary
|
|
"\n# Entries below are not part of the official MathML dictionary\n\n";
|
|
# 4.2) look in our dictionary the remaining entries
|
|
@moz_keys = (keys %moz_hash);
|
|
@moz_keys = reverse(sort(@moz_keys));
|
|
|
|
while ($key = pop(@moz_keys)) {
|
|
@moz = @{ $moz_hash{$key} };
|
|
$moz_value = &generateCommon(@moz);
|
|
print $file_differences "[obsolete entry]";
|
|
$obsolete++;
|
|
if ($moz[6]) {
|
|
print $file_differences "[stretching]";
|
|
$obsolete_stretching++;
|
|
}
|
|
print $file_differences " - $key ($moz[0])\n";
|
|
print $file_differences "-$moz_value\n+\n\n";
|
|
$_ = &completeCommon($moz_value, $key, (), @moz);
|
|
print $file_new_dictionary $_;
|
|
}
|
|
|
|
close($file_differences);
|
|
close($file_new_dictionary);
|
|
|
|
print "\n";
|
|
print "- $obsolete obsolete entries ";
|
|
print "($obsolete_stretching of them are related to stretching)\n";
|
|
print "- $unchanged unchanged entries\n";
|
|
print "- $conflicting conflicting entries ";
|
|
print "($conflicting_stretching of them are related to stretching)\n";
|
|
print "- $new new entries ";
|
|
print "($new_stretching of them are related to stretching)\n";
|
|
print "\nSee output files $FILE_DIFFERENCES and $FILE_NEW_DICTIONARY.\n\n";
|
|
print "After having modified the dictionary, please run";
|
|
print "./updateOperatorDictionary check\n\n";
|
|
exit 0;
|
|
|
|
################################################################################
|
|
sub usage {
|
|
# display the accepted command syntax and quit
|
|
print "usage:\n";
|
|
print " ./updateOperatorDictionary.pl download [unicode.xml]\n";
|
|
print " ./updateOperatorDictionary.pl compare [dictionary.xml]\n";
|
|
print " ./updateOperatorDictionary.pl check\n";
|
|
print " ./updateOperatorDictionary.pl clean\n";
|
|
exit 0;
|
|
}
|
|
|
|
sub generateCommon {
|
|
# helper function to generate the string of data shared by both dictionaries
|
|
my(@v) = @_;
|
|
$entry = "lspace:$v[1] rspace:$v[2]";
|
|
if ($v[4]) { $entry = "$entry largeop"; }
|
|
if ($v[5]) { $entry = "$entry movablelimits"; }
|
|
if ($v[6]) { $entry = "$entry stretchy"; }
|
|
if ($v[7]) { $entry = "$entry separator"; }
|
|
if ($v[9]) { $entry = "$entry fence"; }
|
|
if ($v[10]) { $entry = "$entry symmetric"; }
|
|
return $entry;
|
|
}
|
|
|
|
sub completeCommon {
|
|
# helper to add key and private data to generateCommon
|
|
my($entry, $key, @v_moz, @v_wg) = @_;
|
|
|
|
$entry = "$key = $entry";
|
|
|
|
if ($v_moz[8]) { $entry = "$entry accent"; }
|
|
if ($v_moz[13]) { $entry = "$entry direction:$v_moz[13]"; }
|
|
|
|
if ($v_moz[0]) {
|
|
# keep our previous comment
|
|
$entry = "$entry # $v_moz[0]";
|
|
} else {
|
|
# otherwise use the description given by the WG
|
|
$entry = "$entry # $v_wg[0]";
|
|
}
|
|
|
|
$entry = "$entry\n";
|
|
return $entry;
|
|
}
|
|
|
|
sub generateEntry {
|
|
# helper function to generate an entry of our operator dictionary
|
|
my($key, @moz) = @_;
|
|
$entry = &generateCommon(@moz);
|
|
$entry = &completeCommon($entry, $key, @moz, @moz);
|
|
return $entry;
|
|
}
|