2011-05-04 11:29:45 +00:00
|
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
|
|
*
|
|
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
|
|
* the License. You may obtain a copy of the License at
|
|
|
|
* http://www.mozilla.org/MPL/
|
|
|
|
*
|
|
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
|
|
* for the specific language governing rights and limitations under the
|
|
|
|
* License.
|
|
|
|
*
|
|
|
|
* The Original Code is Mozilla Hyphenation Service.
|
|
|
|
*
|
|
|
|
* The Initial Developer of the Original Code is
|
|
|
|
* Mozilla Foundation.
|
|
|
|
* Portions created by the Initial Developer are Copyright (C) 2011
|
|
|
|
* the Initial Developer. All Rights Reserved.
|
|
|
|
*
|
|
|
|
* Contributor(s):
|
|
|
|
* Jonathan Kew <jfkthame@gmail.com>
|
|
|
|
*
|
|
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
|
|
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
|
|
|
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
|
|
* the provisions above, a recipient may use your version of this file under
|
|
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
|
|
*
|
|
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
|
|
|
|
#include "nsHyphenator.h"
|
|
|
|
#include "nsIFile.h"
|
|
|
|
#include "nsUTF8Utils.h"
|
2012-02-24 10:15:46 +00:00
|
|
|
#include "nsUnicodeProperties.h"
|
2011-05-04 11:29:45 +00:00
|
|
|
#include "nsUnicharUtilCIID.h"
|
2011-10-06 15:06:32 +00:00
|
|
|
#include "nsIURI.h"
|
2011-05-04 11:29:45 +00:00
|
|
|
|
|
|
|
#include "hyphen.h"
|
|
|
|
|
2011-10-06 15:06:32 +00:00
|
|
|
nsHyphenator::nsHyphenator(nsIURI *aURI)
|
2012-07-30 14:20:58 +00:00
|
|
|
: mDict(nullptr)
|
2011-05-04 11:29:45 +00:00
|
|
|
{
|
2011-10-06 15:06:32 +00:00
|
|
|
nsCString uriSpec;
|
|
|
|
nsresult rv = aURI->GetSpec(uriSpec);
|
2011-09-14 19:20:26 +00:00
|
|
|
if (NS_FAILED(rv)) {
|
|
|
|
return;
|
|
|
|
}
|
2011-10-06 15:06:32 +00:00
|
|
|
mDict = hnj_hyphen_load(uriSpec.get());
|
2011-05-04 11:29:45 +00:00
|
|
|
#ifdef DEBUG
|
|
|
|
if (mDict) {
|
2011-10-06 15:06:32 +00:00
|
|
|
printf("loaded hyphenation patterns from %s\n", uriSpec.get());
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
nsHyphenator::~nsHyphenator()
|
|
|
|
{
|
2012-07-30 14:20:58 +00:00
|
|
|
if (mDict != nullptr) {
|
2011-05-04 11:29:45 +00:00
|
|
|
hnj_hyphen_free((HyphenDict*)mDict);
|
2012-07-30 14:20:58 +00:00
|
|
|
mDict = nullptr;
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-09-29 06:19:26 +00:00
|
|
|
bool
|
2011-05-04 11:29:45 +00:00
|
|
|
nsHyphenator::IsValid()
|
|
|
|
{
|
2012-07-30 14:20:58 +00:00
|
|
|
return (mDict != nullptr);
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
nsresult
|
|
|
|
nsHyphenator::Hyphenate(const nsAString& aString,
|
2011-09-29 06:19:26 +00:00
|
|
|
nsTArray<bool>& aHyphens)
|
2011-05-04 11:29:45 +00:00
|
|
|
{
|
|
|
|
if (!aHyphens.SetLength(aString.Length())) {
|
|
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
|
|
}
|
2011-10-17 14:59:28 +00:00
|
|
|
memset(aHyphens.Elements(), false, aHyphens.Length());
|
2011-05-04 11:29:45 +00:00
|
|
|
|
2011-09-29 06:19:26 +00:00
|
|
|
bool inWord = false;
|
2012-08-22 15:56:38 +00:00
|
|
|
uint32_t wordStart = 0, wordLimit = 0;
|
|
|
|
uint32_t chLen;
|
|
|
|
for (uint32_t i = 0; i < aString.Length(); i += chLen) {
|
|
|
|
uint32_t ch = aString[i];
|
2011-07-20 10:15:06 +00:00
|
|
|
chLen = 1;
|
|
|
|
|
|
|
|
if (NS_IS_HIGH_SURROGATE(ch)) {
|
|
|
|
if (i + 1 < aString.Length() && NS_IS_LOW_SURROGATE(aString[i+1])) {
|
|
|
|
ch = SURROGATE_TO_UCS4(ch, aString[i+1]);
|
|
|
|
chLen = 2;
|
|
|
|
} else {
|
|
|
|
NS_WARNING("unpaired surrogate found during hyphenation");
|
|
|
|
}
|
|
|
|
}
|
2011-05-04 11:29:45 +00:00
|
|
|
|
2012-02-24 10:15:46 +00:00
|
|
|
nsIUGenCategory::nsUGenCategory cat = mozilla::unicode::GetGenCategory(ch);
|
2011-05-04 11:29:45 +00:00
|
|
|
if (cat == nsIUGenCategory::kLetter || cat == nsIUGenCategory::kMark) {
|
|
|
|
if (!inWord) {
|
2011-10-17 14:59:28 +00:00
|
|
|
inWord = true;
|
2011-05-04 11:29:45 +00:00
|
|
|
wordStart = i;
|
|
|
|
}
|
2011-07-20 10:15:06 +00:00
|
|
|
wordLimit = i + chLen;
|
|
|
|
if (i + chLen < aString.Length()) {
|
2011-05-04 11:29:45 +00:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (inWord) {
|
2011-07-20 10:15:06 +00:00
|
|
|
const PRUnichar *begin = aString.BeginReading();
|
|
|
|
NS_ConvertUTF16toUTF8 utf8(begin + wordStart,
|
2011-05-04 11:29:45 +00:00
|
|
|
wordLimit - wordStart);
|
|
|
|
nsAutoTArray<char,200> utf8hyphens;
|
|
|
|
utf8hyphens.SetLength(utf8.Length() + 5);
|
2012-07-30 14:20:58 +00:00
|
|
|
char **rep = nullptr;
|
|
|
|
int *pos = nullptr;
|
|
|
|
int *cut = nullptr;
|
2011-05-04 11:29:45 +00:00
|
|
|
int err = hnj_hyphen_hyphenate2((HyphenDict*)mDict,
|
|
|
|
utf8.BeginReading(), utf8.Length(),
|
2012-07-30 14:20:58 +00:00
|
|
|
utf8hyphens.Elements(), nullptr,
|
2011-05-04 11:29:45 +00:00
|
|
|
&rep, &pos, &cut);
|
|
|
|
if (!err) {
|
2011-07-20 10:15:06 +00:00
|
|
|
// Surprisingly, hnj_hyphen_hyphenate2 converts the 'hyphens' buffer
|
|
|
|
// from utf8 code unit indexing (which would match the utf8 input
|
|
|
|
// string directly) to Unicode character indexing.
|
|
|
|
// We then need to convert this to utf16 code unit offsets for Gecko.
|
|
|
|
const char *hyphPtr = utf8hyphens.Elements();
|
|
|
|
const PRUnichar *cur = begin + wordStart;
|
|
|
|
const PRUnichar *end = begin + wordLimit;
|
|
|
|
while (cur < end) {
|
|
|
|
if (*hyphPtr & 0x01) {
|
2011-10-17 14:59:28 +00:00
|
|
|
aHyphens[cur - begin] = true;
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
2011-07-20 10:15:06 +00:00
|
|
|
cur++;
|
|
|
|
if (cur < end && NS_IS_LOW_SURROGATE(*cur) &&
|
|
|
|
NS_IS_HIGH_SURROGATE(*(cur-1)))
|
|
|
|
{
|
|
|
|
cur++;
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
2011-07-20 10:15:06 +00:00
|
|
|
hyphPtr++;
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-10-17 14:59:28 +00:00
|
|
|
inWord = false;
|
2011-05-04 11:29:45 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
return NS_OK;
|
|
|
|
}
|