mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-25 05:41:12 +00:00
bug 744357 - implement mappings from Unicode's SpecialCasing.txt for text-transform. r=smontagu
This commit is contained in:
parent
2010c3d770
commit
7cba3e9c76
287
intl/unicharutil/tools/genSpecialCasingData.pl
Executable file
287
intl/unicharutil/tools/genSpecialCasingData.pl
Executable file
@ -0,0 +1,287 @@
|
||||
#!/usr/bin/env perl
|
||||
|
||||
# This Source Code Form is subject to the terms of the Mozilla Public
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
# You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
# This tool is used to extract "special" (one-to-many) case mappings
|
||||
# into a form that can be used by nsTextRunTransformations.
|
||||
|
||||
use strict;
|
||||
|
||||
if ($#ARGV != 1) {
|
||||
print <<__EOT;
|
||||
# Run this tool using a command line of the form
|
||||
#
|
||||
# perl genSpecialCasingData.pl UnicodeData.txt SpecialCasing.txt
|
||||
#
|
||||
# The nsSpecialCasingData.cpp file will be written to standard output.
|
||||
#
|
||||
# This tool will also write up-to-date versions of the test files
|
||||
# all-{upper,lower,title}.html
|
||||
# and corresponding -ref files in the current directory.
|
||||
#
|
||||
__EOT
|
||||
exit 0;
|
||||
}
|
||||
|
||||
my %allLower;
|
||||
my %allUpper;
|
||||
my %allTitle;
|
||||
my %compositions;
|
||||
my %gc;
|
||||
open FH, "< $ARGV[0]" or die "can't open $ARGV[0] (should be UnicodeData.txt)\n";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
my @fields = split /;/;
|
||||
next if ($fields[1] =~ /</); # ignore ranges etc
|
||||
my $usv = hex "0x$fields[0]";
|
||||
$allUpper{$usv} = $fields[12] if $fields[12] ne '';
|
||||
$allLower{$usv} = $fields[13] if $fields[13] ne '';
|
||||
$allTitle{$usv} = $fields[14] if $fields[14] ne '';
|
||||
$gc{$usv} = $fields[2];
|
||||
# we only care about non-singleton canonical decomps
|
||||
my $decomp = $fields[5];
|
||||
next if $decomp eq '' or $decomp =~ /</ or not $decomp =~ / /;
|
||||
$compositions{$decomp} = sprintf("%04X", $usv);
|
||||
}
|
||||
close FH;
|
||||
|
||||
my %specialLower;
|
||||
my %specialUpper;
|
||||
my %specialTitle;
|
||||
my %charName;
|
||||
my @headerLines;
|
||||
open FH, "< $ARGV[1]" or die "can't open $ARGV[1] (should be SpecialCasing.txt)\n";
|
||||
while (<FH>) {
|
||||
chomp;
|
||||
m/#\s*(.+)$/;
|
||||
my $comment = $1;
|
||||
if ($comment =~ /^(SpecialCasing-|Date:)/) {
|
||||
push @headerLines, $comment;
|
||||
next;
|
||||
}
|
||||
s/#.*//;
|
||||
s/;\s*$//;
|
||||
next if $_ eq '';
|
||||
my @fields = split /; */;
|
||||
next unless (scalar @fields) == 4;
|
||||
my $usv = hex "0x$fields[0]";
|
||||
addIfSpecial(\%specialLower, $usv, $fields[1]);
|
||||
addIfSpecial(\%specialTitle, $usv, $fields[2]);
|
||||
addIfSpecial(\%specialUpper, $usv, $fields[3]);
|
||||
$charName{$usv} = $comment;
|
||||
}
|
||||
close FH;
|
||||
|
||||
print <<__END__;
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* Auto-generated from files in the Unicode Character Database
|
||||
by genSpecialCasingData.pl - do not edit! */
|
||||
|
||||
#include "nsSpecialCasingData.h"
|
||||
#include "mozilla/Util.h" // for ArrayLength
|
||||
#include <stdlib.h> // for bsearch
|
||||
|
||||
__END__
|
||||
map { print "/* $_ */\n" } @headerLines;
|
||||
|
||||
print <<__END__;
|
||||
|
||||
using mozilla::unicode::MultiCharMapping;
|
||||
|
||||
__END__
|
||||
|
||||
printMappings('Lower', \%specialLower);
|
||||
printMappings('Upper', \%specialUpper);
|
||||
printMappings('Title', \%specialTitle);
|
||||
|
||||
print <<__END__;
|
||||
static int CompareMCM(const void* aKey, const void* aElement)
|
||||
{
|
||||
const PRUint32 ch = *static_cast<const PRUint32*>(aKey);
|
||||
const MultiCharMapping* mcm = static_cast<const MultiCharMapping*>(aElement);
|
||||
return int(ch) - int(mcm->mOriginalChar);
|
||||
}
|
||||
|
||||
#define MAKE_SPECIAL_CASE_ACCESSOR(which) \\
|
||||
const MultiCharMapping* \\
|
||||
Special##which(PRUint32 aChar) \\
|
||||
{ \\
|
||||
const void* p = bsearch(&aChar, CaseSpecials_##which, \\
|
||||
mozilla::ArrayLength(CaseSpecials_##which), \\
|
||||
sizeof(MultiCharMapping), CompareMCM); \\
|
||||
return static_cast<const MultiCharMapping*>(p); \\
|
||||
}
|
||||
|
||||
namespace mozilla {
|
||||
namespace unicode {
|
||||
|
||||
MAKE_SPECIAL_CASE_ACCESSOR(Lower)
|
||||
MAKE_SPECIAL_CASE_ACCESSOR(Upper)
|
||||
MAKE_SPECIAL_CASE_ACCESSOR(Title)
|
||||
|
||||
} // namespace unicode
|
||||
} // namespace mozilla
|
||||
__END__
|
||||
|
||||
addSpecialsTo(\%allLower, \%specialLower);
|
||||
addSpecialsTo(\%allUpper, \%specialUpper);
|
||||
addSpecialsTo(\%allTitle, \%specialTitle);
|
||||
|
||||
my $testFont = "../fonts/dejavu-sans/DejaVuSans.ttf";
|
||||
genTest('lower', \%allLower);
|
||||
genTest('upper', \%allUpper);
|
||||
genTitleTest();
|
||||
|
||||
sub printMappings {
|
||||
my ($whichMapping, $hash) = @_;
|
||||
print "static const MultiCharMapping CaseSpecials_${whichMapping}[] = {\n";
|
||||
foreach my $key (sort { $a <=> $b } keys %$hash) {
|
||||
my @chars = split(/ /, $hash->{$key});
|
||||
printf " { 0x%04x, {0x%04x, 0x%04x, 0x%04x} }, // %s\n", $key,
|
||||
hex "0x0$chars[0]", hex "0x0$chars[1]", hex "0x0$chars[2]",
|
||||
"$charName{$key}";
|
||||
}
|
||||
print "};\n\n";
|
||||
};
|
||||
|
||||
sub addIfSpecial {
|
||||
my ($hash, $usv, $mapping) = @_;
|
||||
return unless $mapping =~ / /;
|
||||
# only do compositions that start with the initial char
|
||||
foreach (keys %compositions) {
|
||||
$mapping =~ s/^$_/$compositions{$_}/;
|
||||
}
|
||||
$hash->{$usv} = $mapping;
|
||||
};
|
||||
|
||||
sub addSpecialsTo {
|
||||
my ($hash, $specials) = @_;
|
||||
foreach my $key (keys %$specials) {
|
||||
$hash->{$key} = $specials->{$key};
|
||||
}
|
||||
};
|
||||
|
||||
sub genTest {
|
||||
my ($whichMapping, $hash) = @_;
|
||||
open OUT, "> all-$whichMapping.html";
|
||||
print OUT <<__END__;
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
|
||||
<style type="text/css">
|
||||
\@font-face { font-family: foo; src: url($testFont); }
|
||||
p { font-family: foo; text-transform: ${whichMapping}case; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
__END__
|
||||
foreach my $key (sort { $a <=> $b } keys %$hash) {
|
||||
printf OUT "&#x%04X;", $key;
|
||||
print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
|
||||
print OUT "\n";
|
||||
}
|
||||
print OUT <<__END__;
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
__END__
|
||||
close OUT;
|
||||
|
||||
open OUT, "> all-$whichMapping-ref.html";
|
||||
print OUT <<__END__;
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
|
||||
<style type="text/css">
|
||||
\@font-face { font-family: foo; src: url($testFont); }
|
||||
p { font-family: foo; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
__END__
|
||||
foreach my $key (sort { $a <=> $b } keys %$hash) {
|
||||
print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $hash->{$key}));
|
||||
print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
|
||||
print OUT "\n";
|
||||
}
|
||||
print OUT <<__END__;
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
__END__
|
||||
close OUT;
|
||||
};
|
||||
|
||||
sub genTitleTest {
|
||||
open OUT, "> all-title.html";
|
||||
print OUT <<__END__;
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
|
||||
<style type="text/css">
|
||||
\@font-face { font-family: foo; src: url($testFont); }
|
||||
p { font-family: foo; text-transform: capitalize; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
__END__
|
||||
foreach my $key (sort { $a <=> $b } keys %allTitle) {
|
||||
printf OUT "&#x%04X;x", $key;
|
||||
print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
|
||||
print OUT "\n";
|
||||
}
|
||||
print OUT <<__END__;
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
__END__
|
||||
close OUT;
|
||||
|
||||
open OUT, "> all-title-ref.html";
|
||||
print OUT <<__END__;
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-type" content="text/html; charset=utf-8">
|
||||
<style type="text/css">
|
||||
\@font-face { font-family: foo; src: url($testFont); }
|
||||
p { font-family: foo; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<p>
|
||||
__END__
|
||||
foreach my $key (sort { $a <=> $b } keys %allTitle) {
|
||||
# capitalize is only applied to characters with GC=L* or N*...
|
||||
if ($gc{$key} =~ /^[LN]/) {
|
||||
# ...and those that are already uppercase are not transformed
|
||||
if (exists $allUpper{$key}) {
|
||||
print OUT join('', map { sprintf("&#x%s;", $_) } split(/ /, $allTitle{$key}));
|
||||
} else {
|
||||
printf OUT "&#x%04X;", $key;
|
||||
}
|
||||
print OUT "x";
|
||||
} else {
|
||||
printf OUT "&#x%04X;X", $key;
|
||||
}
|
||||
print OUT " <!-- $charName{$key} -->" if exists $charName{$key};
|
||||
print OUT "\n";
|
||||
}
|
||||
print OUT <<__END__;
|
||||
</p>
|
||||
</body>
|
||||
</html>
|
||||
__END__
|
||||
close OUT;
|
||||
};
|
@ -62,6 +62,7 @@ SDK_HEADERS = \
|
||||
|
||||
EXPORTS = \
|
||||
nsBidiUtils.h \
|
||||
nsSpecialCasingData.h \
|
||||
nsUnicodeProperties.h \
|
||||
nsUnicodeScriptCodes.h \
|
||||
$(NULL)
|
||||
@ -69,6 +70,7 @@ EXPORTS = \
|
||||
CPPSRCS = \
|
||||
nsUnicharUtils.cpp \
|
||||
nsBidiUtils.cpp \
|
||||
nsSpecialCasingData.cpp \
|
||||
nsUnicodeProperties.cpp \
|
||||
$(NULL)
|
||||
|
||||
|
202
intl/unicharutil/util/nsSpecialCasingData.cpp
Normal file
202
intl/unicharutil/util/nsSpecialCasingData.cpp
Normal file
@ -0,0 +1,202 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
/* Auto-generated from files in the Unicode Character Database
|
||||
by genSpecialCasingData.pl - do not edit! */
|
||||
|
||||
#include "nsSpecialCasingData.h"
|
||||
#include "mozilla/Util.h" // for ArrayLength
|
||||
#include <stdlib.h> // for bsearch
|
||||
|
||||
/* SpecialCasing-6.1.0.txt */
|
||||
/* Date: 2011-11-27, 05:10:51 GMT [MD] */
|
||||
|
||||
using mozilla::unicode::MultiCharMapping;
|
||||
|
||||
static const MultiCharMapping CaseSpecials_Lower[] = {
|
||||
{ 0x0130, {0x0069, 0x0307, 0x0000} }, // LATIN CAPITAL LETTER I WITH DOT ABOVE
|
||||
};
|
||||
|
||||
static const MultiCharMapping CaseSpecials_Upper[] = {
|
||||
{ 0x00df, {0x0053, 0x0053, 0x0000} }, // LATIN SMALL LETTER SHARP S
|
||||
{ 0x0149, {0x02bc, 0x004e, 0x0000} }, // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
{ 0x01f0, {0x004a, 0x030c, 0x0000} }, // LATIN SMALL LETTER J WITH CARON
|
||||
{ 0x0390, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
{ 0x03b0, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
{ 0x0587, {0x0535, 0x0552, 0x0000} }, // ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
{ 0x1e96, {0x0048, 0x0331, 0x0000} }, // LATIN SMALL LETTER H WITH LINE BELOW
|
||||
{ 0x1e97, {0x0054, 0x0308, 0x0000} }, // LATIN SMALL LETTER T WITH DIAERESIS
|
||||
{ 0x1e98, {0x0057, 0x030a, 0x0000} }, // LATIN SMALL LETTER W WITH RING ABOVE
|
||||
{ 0x1e99, {0x0059, 0x030a, 0x0000} }, // LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
{ 0x1e9a, {0x0041, 0x02be, 0x0000} }, // LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
{ 0x1f50, {0x03a5, 0x0313, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
{ 0x1f52, {0x03a5, 0x0313, 0x0300} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
{ 0x1f54, {0x03a5, 0x0313, 0x0301} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
{ 0x1f56, {0x03a5, 0x0313, 0x0342} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
{ 0x1f80, {0x1f08, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
|
||||
{ 0x1f81, {0x1f09, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
|
||||
{ 0x1f82, {0x1f0a, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1f83, {0x1f0b, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1f84, {0x1f0c, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1f85, {0x1f0d, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1f86, {0x1f0e, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1f87, {0x1f0f, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1f88, {0x1f08, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
|
||||
{ 0x1f89, {0x1f09, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
|
||||
{ 0x1f8a, {0x1f0a, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
{ 0x1f8b, {0x1f0b, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
{ 0x1f8c, {0x1f0c, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
{ 0x1f8d, {0x1f0d, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
{ 0x1f8e, {0x1f0e, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
{ 0x1f8f, {0x1f0f, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
{ 0x1f90, {0x1f28, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
|
||||
{ 0x1f91, {0x1f29, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
|
||||
{ 0x1f92, {0x1f2a, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1f93, {0x1f2b, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1f94, {0x1f2c, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1f95, {0x1f2d, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1f96, {0x1f2e, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1f97, {0x1f2f, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1f98, {0x1f28, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
|
||||
{ 0x1f99, {0x1f29, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
|
||||
{ 0x1f9a, {0x1f2a, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
{ 0x1f9b, {0x1f2b, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
{ 0x1f9c, {0x1f2c, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
{ 0x1f9d, {0x1f2d, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
{ 0x1f9e, {0x1f2e, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
{ 0x1f9f, {0x1f2f, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
{ 0x1fa0, {0x1f68, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
|
||||
{ 0x1fa1, {0x1f69, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
|
||||
{ 0x1fa2, {0x1f6a, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1fa3, {0x1f6b, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1fa4, {0x1f6c, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1fa5, {0x1f6d, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1fa6, {0x1f6e, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1fa7, {0x1f6f, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1fa8, {0x1f68, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
|
||||
{ 0x1fa9, {0x1f69, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
|
||||
{ 0x1faa, {0x1f6a, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI
|
||||
{ 0x1fab, {0x1f6b, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI
|
||||
{ 0x1fac, {0x1f6c, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI
|
||||
{ 0x1fad, {0x1f6d, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI
|
||||
{ 0x1fae, {0x1f6e, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
{ 0x1faf, {0x1f6f, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI
|
||||
{ 0x1fb2, {0x1fba, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1fb3, {0x0391, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
|
||||
{ 0x1fb4, {0x0386, 0x0399, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1fb6, {0x0391, 0x0342, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
{ 0x1fb7, {0x0391, 0x0342, 0x0399} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1fbc, {0x0391, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
|
||||
{ 0x1fc2, {0x1fca, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1fc3, {0x0397, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
|
||||
{ 0x1fc4, {0x0389, 0x0399, 0x0000} }, // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1fc6, {0x0397, 0x0342, 0x0000} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
{ 0x1fc7, {0x0397, 0x0342, 0x0399} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1fcc, {0x0397, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
|
||||
{ 0x1fd2, {0x03aa, 0x0300, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
{ 0x1fd3, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
{ 0x1fd6, {0x0399, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
{ 0x1fd7, {0x03aa, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
{ 0x1fe2, {0x03ab, 0x0300, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
{ 0x1fe3, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
{ 0x1fe4, {0x03a1, 0x0313, 0x0000} }, // GREEK SMALL LETTER RHO WITH PSILI
|
||||
{ 0x1fe6, {0x03a5, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
{ 0x1fe7, {0x03ab, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
{ 0x1ff2, {0x1ffa, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1ff3, {0x03a9, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
|
||||
{ 0x1ff4, {0x038f, 0x0399, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1ff6, {0x03a9, 0x0342, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
{ 0x1ff7, {0x03a9, 0x0342, 0x0399} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1ffc, {0x03a9, 0x0399, 0x0000} }, // GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
|
||||
{ 0xfb00, {0x0046, 0x0046, 0x0000} }, // LATIN SMALL LIGATURE FF
|
||||
{ 0xfb01, {0x0046, 0x0049, 0x0000} }, // LATIN SMALL LIGATURE FI
|
||||
{ 0xfb02, {0x0046, 0x004c, 0x0000} }, // LATIN SMALL LIGATURE FL
|
||||
{ 0xfb03, {0x0046, 0x0046, 0x0049} }, // LATIN SMALL LIGATURE FFI
|
||||
{ 0xfb04, {0x0046, 0x0046, 0x004c} }, // LATIN SMALL LIGATURE FFL
|
||||
{ 0xfb05, {0x0053, 0x0054, 0x0000} }, // LATIN SMALL LIGATURE LONG S T
|
||||
{ 0xfb06, {0x0053, 0x0054, 0x0000} }, // LATIN SMALL LIGATURE ST
|
||||
{ 0xfb13, {0x0544, 0x0546, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN NOW
|
||||
{ 0xfb14, {0x0544, 0x0535, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN ECH
|
||||
{ 0xfb15, {0x0544, 0x053b, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN INI
|
||||
{ 0xfb16, {0x054e, 0x0546, 0x0000} }, // ARMENIAN SMALL LIGATURE VEW NOW
|
||||
{ 0xfb17, {0x0544, 0x053d, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN XEH
|
||||
};
|
||||
|
||||
static const MultiCharMapping CaseSpecials_Title[] = {
|
||||
{ 0x00df, {0x0053, 0x0073, 0x0000} }, // LATIN SMALL LETTER SHARP S
|
||||
{ 0x0149, {0x02bc, 0x004e, 0x0000} }, // LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
|
||||
{ 0x01f0, {0x004a, 0x030c, 0x0000} }, // LATIN SMALL LETTER J WITH CARON
|
||||
{ 0x0390, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
|
||||
{ 0x03b0, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
|
||||
{ 0x0587, {0x0535, 0x0582, 0x0000} }, // ARMENIAN SMALL LIGATURE ECH YIWN
|
||||
{ 0x1e96, {0x0048, 0x0331, 0x0000} }, // LATIN SMALL LETTER H WITH LINE BELOW
|
||||
{ 0x1e97, {0x0054, 0x0308, 0x0000} }, // LATIN SMALL LETTER T WITH DIAERESIS
|
||||
{ 0x1e98, {0x0057, 0x030a, 0x0000} }, // LATIN SMALL LETTER W WITH RING ABOVE
|
||||
{ 0x1e99, {0x0059, 0x030a, 0x0000} }, // LATIN SMALL LETTER Y WITH RING ABOVE
|
||||
{ 0x1e9a, {0x0041, 0x02be, 0x0000} }, // LATIN SMALL LETTER A WITH RIGHT HALF RING
|
||||
{ 0x1f50, {0x03a5, 0x0313, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PSILI
|
||||
{ 0x1f52, {0x03a5, 0x0313, 0x0300} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
|
||||
{ 0x1f54, {0x03a5, 0x0313, 0x0301} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
|
||||
{ 0x1f56, {0x03a5, 0x0313, 0x0342} }, // GREEK SMALL LETTER UPSILON WITH PSILI AND PERISPOMENI
|
||||
{ 0x1fb2, {0x1fba, 0x0345, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1fb4, {0x0386, 0x0345, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1fb6, {0x0391, 0x0342, 0x0000} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI
|
||||
{ 0x1fb7, {0x0391, 0x0342, 0x0345} }, // GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1fc2, {0x1fca, 0x0345, 0x0000} }, // GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1fc4, {0x0389, 0x0345, 0x0000} }, // GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1fc6, {0x0397, 0x0342, 0x0000} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI
|
||||
{ 0x1fc7, {0x0397, 0x0342, 0x0345} }, // GREEK SMALL LETTER ETA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0x1fd2, {0x03aa, 0x0300, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
|
||||
{ 0x1fd3, {0x03aa, 0x0301, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
|
||||
{ 0x1fd6, {0x0399, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH PERISPOMENI
|
||||
{ 0x1fd7, {0x03aa, 0x0342, 0x0000} }, // GREEK SMALL LETTER IOTA WITH DIALYTIKA AND PERISPOMENI
|
||||
{ 0x1fe2, {0x03ab, 0x0300, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
|
||||
{ 0x1fe3, {0x03ab, 0x0301, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
|
||||
{ 0x1fe4, {0x03a1, 0x0313, 0x0000} }, // GREEK SMALL LETTER RHO WITH PSILI
|
||||
{ 0x1fe6, {0x03a5, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH PERISPOMENI
|
||||
{ 0x1fe7, {0x03ab, 0x0342, 0x0000} }, // GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND PERISPOMENI
|
||||
{ 0x1ff2, {0x1ffa, 0x0345, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
|
||||
{ 0x1ff4, {0x038f, 0x0345, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
|
||||
{ 0x1ff6, {0x03a9, 0x0342, 0x0000} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI
|
||||
{ 0x1ff7, {0x03a9, 0x0342, 0x0345} }, // GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND YPOGEGRAMMENI
|
||||
{ 0xfb00, {0x0046, 0x0066, 0x0000} }, // LATIN SMALL LIGATURE FF
|
||||
{ 0xfb01, {0x0046, 0x0069, 0x0000} }, // LATIN SMALL LIGATURE FI
|
||||
{ 0xfb02, {0x0046, 0x006c, 0x0000} }, // LATIN SMALL LIGATURE FL
|
||||
{ 0xfb03, {0x0046, 0x0066, 0x0069} }, // LATIN SMALL LIGATURE FFI
|
||||
{ 0xfb04, {0x0046, 0x0066, 0x006c} }, // LATIN SMALL LIGATURE FFL
|
||||
{ 0xfb05, {0x0053, 0x0074, 0x0000} }, // LATIN SMALL LIGATURE LONG S T
|
||||
{ 0xfb06, {0x0053, 0x0074, 0x0000} }, // LATIN SMALL LIGATURE ST
|
||||
{ 0xfb13, {0x0544, 0x0576, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN NOW
|
||||
{ 0xfb14, {0x0544, 0x0565, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN ECH
|
||||
{ 0xfb15, {0x0544, 0x056b, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN INI
|
||||
{ 0xfb16, {0x054e, 0x0576, 0x0000} }, // ARMENIAN SMALL LIGATURE VEW NOW
|
||||
{ 0xfb17, {0x0544, 0x056d, 0x0000} }, // ARMENIAN SMALL LIGATURE MEN XEH
|
||||
};
|
||||
|
||||
static int CompareMCM(const void* aKey, const void* aElement)
|
||||
{
|
||||
const PRUint32 ch = *static_cast<const PRUint32*>(aKey);
|
||||
const MultiCharMapping* mcm = static_cast<const MultiCharMapping*>(aElement);
|
||||
return int(ch) - int(mcm->mOriginalChar);
|
||||
}
|
||||
|
||||
#define MAKE_SPECIAL_CASE_ACCESSOR(which) \
|
||||
const MultiCharMapping* \
|
||||
Special##which(PRUint32 aChar) \
|
||||
{ \
|
||||
const void* p = bsearch(&aChar, CaseSpecials_##which, \
|
||||
mozilla::ArrayLength(CaseSpecials_##which), \
|
||||
sizeof(MultiCharMapping), CompareMCM); \
|
||||
return static_cast<const MultiCharMapping*>(p); \
|
||||
}
|
||||
|
||||
namespace mozilla {
|
||||
namespace unicode {
|
||||
|
||||
MAKE_SPECIAL_CASE_ACCESSOR(Lower)
|
||||
MAKE_SPECIAL_CASE_ACCESSOR(Upper)
|
||||
MAKE_SPECIAL_CASE_ACCESSOR(Title)
|
||||
|
||||
} // namespace unicode
|
||||
} // namespace mozilla
|
26
intl/unicharutil/util/nsSpecialCasingData.h
Normal file
26
intl/unicharutil/util/nsSpecialCasingData.h
Normal file
@ -0,0 +1,26 @@
|
||||
/* This Source Code Form is subject to the terms of the Mozilla Public
|
||||
* License, v. 2.0. If a copy of the MPL was not distributed with this file,
|
||||
* You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||
|
||||
#include "prtypes.h"
|
||||
|
||||
namespace mozilla {
|
||||
namespace unicode {
|
||||
|
||||
// Multi-character mappings (from SpecialCasing.txt) map a single Unicode
|
||||
// value to a sequence of 2 or 3 Unicode characters. There are currently none
|
||||
// defined outside the BMP, so we can use PRUnichar here. Unused trailing
|
||||
// positions in mMappedChars are set to 0.
|
||||
struct MultiCharMapping {
|
||||
PRUnichar mOriginalChar;
|
||||
PRUnichar mMappedChars[3];
|
||||
};
|
||||
|
||||
// Return a pointer to the special case mapping for the given character;
|
||||
// returns NULL if no such mapping is defined.
|
||||
const MultiCharMapping* SpecialUpper(PRUint32 aCh);
|
||||
const MultiCharMapping* SpecialLower(PRUint32 aCh);
|
||||
const MultiCharMapping* SpecialTitle(PRUint32 aCh);
|
||||
|
||||
} // namespace unicode
|
||||
} // namespace mozilla
|
@ -37,6 +37,7 @@
|
||||
INTL_UNICHARUTIL_UTIL_LCPPSRCS = \
|
||||
nsUnicharUtils.cpp \
|
||||
nsBidiUtils.cpp \
|
||||
nsSpecialCasingData.cpp \
|
||||
nsUnicodeProperties.cpp \
|
||||
$(NULL)
|
||||
|
||||
|
@ -47,8 +47,7 @@
|
||||
#include "nsContentUtils.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
#include "nsUnicodeProperties.h"
|
||||
|
||||
#define SZLIG 0x00DF
|
||||
#include "nsSpecialCasingData.h"
|
||||
|
||||
// Unicode characters needing special casing treatment in tr/az languages
|
||||
#define LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE 0x0130
|
||||
@ -158,11 +157,18 @@ nsTransformingTextRunFactory::MakeTextRun(const PRUint8* aString, PRUint32 aLeng
|
||||
* are identical.
|
||||
*
|
||||
* This is used for text-transform:uppercase when we encounter a SZLIG,
|
||||
* whose uppercase form is "SS".
|
||||
* whose uppercase form is "SS", or other ligature or precomposed form
|
||||
* that expands to multiple codepoints during case transformation.
|
||||
*
|
||||
* This function is unable to merge characters when they occur in different
|
||||
* glyph runs. It's hard to see how this could happen, but if it does, we just
|
||||
* discard the characters-to-merge.
|
||||
* glyph runs. This only happens in tricky edge cases where a character was
|
||||
* decomposed by case-mapping (e.g. there's no precomposed uppercase version
|
||||
* of an accented lowercase letter), and then font-matching caused the
|
||||
* diacritics to be assigned to a different font than the base character.
|
||||
* In this situation, the diacritic(s) get discarded, which is less than
|
||||
* ideal, but they probably weren't going to render very well anyway.
|
||||
* Bug 543200 will improve this by making font-matching operate on entire
|
||||
* clusters instead of individual codepoints.
|
||||
*
|
||||
* For simplicity, this produces a textrun containing all DetailedGlyphs,
|
||||
* no simple glyphs. So don't call it unless you really have merging to do.
|
||||
@ -188,9 +194,11 @@ MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
|
||||
|
||||
bool anyMissing = false;
|
||||
PRUint32 mergeRunStart = iter.GetStringStart();
|
||||
PRUint32 k;
|
||||
for (k = iter.GetStringStart(); k < iter.GetStringEnd(); ++k) {
|
||||
const gfxTextRun::CompressedGlyph g = aSrc->GetCharacterGlyphs()[k];
|
||||
const gfxTextRun::CompressedGlyph *srcGlyphs = aSrc->GetCharacterGlyphs();
|
||||
gfxTextRun::CompressedGlyph mergedGlyph = srcGlyphs[mergeRunStart];
|
||||
PRUint32 stringEnd = iter.GetStringEnd();
|
||||
for (PRUint32 k = iter.GetStringStart(); k < stringEnd; ++k) {
|
||||
const gfxTextRun::CompressedGlyph g = srcGlyphs[k];
|
||||
if (g.IsSimpleGlyph()) {
|
||||
if (!anyMissing) {
|
||||
gfxTextRun::DetailedGlyph details;
|
||||
@ -210,40 +218,39 @@ MergeCharactersInTextRun(gfxTextRun* aDest, gfxTextRun* aSrc,
|
||||
}
|
||||
}
|
||||
|
||||
// We could teach this method to handle merging of characters that aren't
|
||||
// cluster starts or ligature group starts, but this is really only used
|
||||
// to merge S's (uppercase ß), so it's not worth it.
|
||||
|
||||
if (k + 1 < iter.GetStringEnd() && aCharsToMerge[k + 1]) {
|
||||
NS_ASSERTION(g.IsClusterStart() && g.IsLigatureGroupStart(),
|
||||
"Don't know how to merge this stuff");
|
||||
// next char is supposed to merge with current, so loop without
|
||||
// writing current merged glyph to the destination
|
||||
continue;
|
||||
}
|
||||
|
||||
NS_ASSERTION(mergeRunStart == k ||
|
||||
(g.IsClusterStart() && g.IsLigatureGroupStart()),
|
||||
"Don't know how to merge this stuff");
|
||||
|
||||
// If the start of the merge run is actually a character that should
|
||||
// have been merged with the previous character (this can happen
|
||||
// if there's a font change in the middle of a szlig, for example),
|
||||
// if there's a font change in the middle of a case-mapped character,
|
||||
// that decomposed into a sequence of base+diacritics, for example),
|
||||
// just discard the entire merge run. See comment at start of this
|
||||
// function.
|
||||
NS_WARN_IF_FALSE(!aCharsToMerge[mergeRunStart],
|
||||
"unable to merge across a glyph run boundary, "
|
||||
"glyph(s) discarded");
|
||||
if (!aCharsToMerge[mergeRunStart]) {
|
||||
gfxTextRun::CompressedGlyph mergedGlyphs =
|
||||
aSrc->GetCharacterGlyphs()[mergeRunStart];
|
||||
if (anyMissing) {
|
||||
mergedGlyphs.SetMissing(glyphs.Length());
|
||||
mergedGlyph.SetMissing(glyphs.Length());
|
||||
} else {
|
||||
mergedGlyphs.SetComplex(true, true, glyphs.Length());
|
||||
mergedGlyph.SetComplex(mergedGlyph.IsClusterStart(),
|
||||
mergedGlyph.IsLigatureGroupStart(),
|
||||
glyphs.Length());
|
||||
}
|
||||
aDest->SetGlyphs(offset, mergedGlyphs, glyphs.Elements());
|
||||
aDest->SetGlyphs(offset, mergedGlyph, glyphs.Elements());
|
||||
++offset;
|
||||
}
|
||||
|
||||
glyphs.Clear();
|
||||
anyMissing = false;
|
||||
mergeRunStart = k + 1;
|
||||
if (mergeRunStart < stringEnd) {
|
||||
mergedGlyph = srcGlyphs[mergeRunStart];
|
||||
}
|
||||
}
|
||||
NS_ASSERTION(glyphs.Length() == 0,
|
||||
"Leftover glyphs, don't request merging of the last character with its next!");
|
||||
@ -310,7 +317,7 @@ nsFontVariantTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
|
||||
}
|
||||
PRUint32 ch2 = ToUpperCase(ch);
|
||||
isLowercase = ch != ch2 || ch == SZLIG;
|
||||
isLowercase = ch != ch2 || mozilla::unicode::SpecialUpper(ch);
|
||||
} else {
|
||||
// Don't transform the character! I.e., pretend that it's not lowercase
|
||||
}
|
||||
@ -399,7 +406,8 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
|
||||
PRUint8 style = mAllUppercase ? NS_STYLE_TEXT_TRANSFORM_UPPERCASE
|
||||
: styleContext->GetStyleText()->mTextTransform;
|
||||
bool extraChar = false;
|
||||
int extraChars = 0;
|
||||
const mozilla::unicode::MultiCharMapping *mcm;
|
||||
|
||||
if (NS_IS_HIGH_SURROGATE(ch) && i < length - 1 && NS_IS_LOW_SURROGATE(str[i + 1])) {
|
||||
ch = SURROGATE_TO_UCS4(ch, str[i + 1]);
|
||||
@ -420,11 +428,19 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
|
||||
switch (style) {
|
||||
case NS_STYLE_TEXT_TRANSFORM_LOWERCASE:
|
||||
if (languageSpecificCasing == eTurkish && ch == 'I') {
|
||||
ch = LATIN_SMALL_LETTER_DOTLESS_I;
|
||||
prevIsLetter = true;
|
||||
sigmaIndex = PRUint32(-1);
|
||||
break;
|
||||
if (languageSpecificCasing == eTurkish) {
|
||||
if (ch == 'I') {
|
||||
ch = LATIN_SMALL_LETTER_DOTLESS_I;
|
||||
prevIsLetter = true;
|
||||
sigmaIndex = PRUint32(-1);
|
||||
break;
|
||||
}
|
||||
if (ch == LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE) {
|
||||
ch = 'i';
|
||||
prevIsLetter = true;
|
||||
sigmaIndex = PRUint32(-1);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Special lowercasing behavior for Greek Sigma: note that this is listed
|
||||
@ -473,8 +489,6 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
break;
|
||||
}
|
||||
|
||||
ch = ToLowerCase(ch);
|
||||
|
||||
// ignore diacritics for the purpose of contextual sigma mapping;
|
||||
// otherwise, reset prevIsLetter appropriately and clear the
|
||||
// sigmaIndex marker
|
||||
@ -482,19 +496,40 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
prevIsLetter = (cat == nsIUGenCategory::kLetter);
|
||||
sigmaIndex = PRUint32(-1);
|
||||
}
|
||||
|
||||
mcm = mozilla::unicode::SpecialLower(ch);
|
||||
if (mcm) {
|
||||
int j = 0;
|
||||
while (j < 2 && mcm->mMappedChars[j + 1]) {
|
||||
convertedString.Append(mcm->mMappedChars[j]);
|
||||
++extraChars;
|
||||
++j;
|
||||
}
|
||||
ch = mcm->mMappedChars[j];
|
||||
break;
|
||||
}
|
||||
|
||||
ch = ToLowerCase(ch);
|
||||
break;
|
||||
|
||||
case NS_STYLE_TEXT_TRANSFORM_UPPERCASE:
|
||||
if (ch == SZLIG) {
|
||||
convertedString.Append('S');
|
||||
extraChar = true;
|
||||
ch = 'S';
|
||||
break;
|
||||
}
|
||||
if (languageSpecificCasing == eTurkish && ch == 'i') {
|
||||
ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
|
||||
break;
|
||||
}
|
||||
|
||||
mcm = mozilla::unicode::SpecialUpper(ch);
|
||||
if (mcm) {
|
||||
int j = 0;
|
||||
while (j < 2 && mcm->mMappedChars[j + 1]) {
|
||||
convertedString.Append(mcm->mMappedChars[j]);
|
||||
++extraChars;
|
||||
++j;
|
||||
}
|
||||
ch = mcm->mMappedChars[j];
|
||||
break;
|
||||
}
|
||||
|
||||
ch = ToUpperCase(ch);
|
||||
break;
|
||||
|
||||
@ -506,12 +541,6 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
}
|
||||
capitalizeDutchIJ = false;
|
||||
if (i < aTextRun->mCapitalize.Length() && aTextRun->mCapitalize[i]) {
|
||||
if (ch == SZLIG) {
|
||||
convertedString.Append('S');
|
||||
extraChar = true;
|
||||
ch = 'S';
|
||||
break;
|
||||
}
|
||||
if (languageSpecificCasing == eTurkish && ch == 'i') {
|
||||
ch = LATIN_CAPITAL_LETTER_I_WITH_DOT_ABOVE;
|
||||
break;
|
||||
@ -521,6 +550,19 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
capitalizeDutchIJ = true;
|
||||
break;
|
||||
}
|
||||
|
||||
mcm = mozilla::unicode::SpecialTitle(ch);
|
||||
if (mcm) {
|
||||
int j = 0;
|
||||
while (j < 2 && mcm->mMappedChars[j + 1]) {
|
||||
convertedString.Append(mcm->mMappedChars[j]);
|
||||
++extraChars;
|
||||
++j;
|
||||
}
|
||||
ch = mcm->mMappedChars[j];
|
||||
break;
|
||||
}
|
||||
|
||||
ch = ToTitleCase(ch);
|
||||
}
|
||||
break;
|
||||
@ -540,11 +582,12 @@ nsCaseTransformTextRunFactory::RebuildTextRun(nsTransformedTextRun* aTextRun,
|
||||
canBreakBeforeArray.AppendElement(false);
|
||||
}
|
||||
|
||||
if (extraChar) {
|
||||
while (extraChars > 0) {
|
||||
++extraCharsCount;
|
||||
charsToMergeArray.AppendElement(true);
|
||||
styleArray.AppendElement(styleContext);
|
||||
canBreakBeforeArray.AppendElement(false);
|
||||
--extraChars;
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user