mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-28 15:23:51 +00:00
Backed out changeset 25c7efa29d66 (bug 1318955) for breaking Hungarian spellchecking.
--HG-- extra : rebase_source : 8267bc48037eaf64d26ff93d60cd7998cb26d0ac
This commit is contained in:
parent
d570415e50
commit
906fc553b2
@ -4,7 +4,7 @@
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
UNIFIED_SOURCES += [
|
||||
SOURCES += [
|
||||
'mozHunspell.cpp',
|
||||
'mozHunspellDirProvider.cpp',
|
||||
'RemoteSpellCheckEngineChild.cpp',
|
||||
|
@ -200,7 +200,7 @@ NS_IMETHODIMP mozHunspell::SetDictionary(const char16_t *aDictionary)
|
||||
if (!mHunspell)
|
||||
return NS_ERROR_OUT_OF_MEMORY;
|
||||
|
||||
nsAutoCString label(mHunspell->get_dict_encoding().c_str());
|
||||
nsDependentCString label(mHunspell->get_dic_encoding());
|
||||
nsAutoCString encoding;
|
||||
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
|
||||
return NS_ERROR_UCONV_NOCONV;
|
||||
@ -480,8 +480,7 @@ mozHunspell::LoadDictionariesFromDir(nsIFile* aDir)
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
nsresult
|
||||
mozHunspell::ConvertCharset(const char16_t* aStr, std::string* aDst)
|
||||
nsresult mozHunspell::ConvertCharset(const char16_t* aStr, char ** aDst)
|
||||
{
|
||||
NS_ENSURE_ARG_POINTER(aDst);
|
||||
NS_ENSURE_TRUE(mEncoder, NS_ERROR_NULL_POINTER);
|
||||
@ -491,13 +490,12 @@ mozHunspell::ConvertCharset(const char16_t* aStr, std::string* aDst)
|
||||
nsresult rv = mEncoder->GetMaxLength(aStr, inLength, &outLength);
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
aDst->resize(outLength);
|
||||
*aDst = (char *) moz_xmalloc(sizeof(char) * (outLength+1));
|
||||
NS_ENSURE_TRUE(*aDst, NS_ERROR_OUT_OF_MEMORY);
|
||||
|
||||
char* dst = &aDst->operator[](0);
|
||||
rv = mEncoder->Convert(aStr, &inLength, dst, &outLength);
|
||||
if (NS_SUCCEEDED(rv)) {
|
||||
aDst->resize(outLength);
|
||||
}
|
||||
rv = mEncoder->Convert(aStr, &inLength, *aDst, &outLength);
|
||||
if (NS_SUCCEEDED(rv))
|
||||
(*aDst)[outLength] = '\0';
|
||||
|
||||
return rv;
|
||||
}
|
||||
@ -520,11 +518,12 @@ NS_IMETHODIMP mozHunspell::Check(const char16_t *aWord, bool *aResult)
|
||||
NS_ENSURE_ARG_POINTER(aResult);
|
||||
NS_ENSURE_TRUE(mHunspell, NS_ERROR_FAILURE);
|
||||
|
||||
std::string charsetWord;
|
||||
nsresult rv = ConvertCharset(aWord, &charsetWord);
|
||||
nsXPIDLCString charsetWord;
|
||||
nsresult rv = ConvertCharset(aWord, getter_Copies(charsetWord));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
*aResult = mHunspell->spell(charsetWord);
|
||||
*aResult = !!mHunspell->spell(charsetWord);
|
||||
|
||||
|
||||
if (!*aResult && mPersonalDictionary)
|
||||
rv = mPersonalDictionary->Check(aWord, mLanguage.get(), aResult);
|
||||
@ -541,12 +540,12 @@ NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestio
|
||||
nsresult rv;
|
||||
*aSuggestionCount = 0;
|
||||
|
||||
std::string charsetWord;
|
||||
rv = ConvertCharset(aWord, &charsetWord);
|
||||
nsXPIDLCString charsetWord;
|
||||
rv = ConvertCharset(aWord, getter_Copies(charsetWord));
|
||||
NS_ENSURE_SUCCESS(rv, rv);
|
||||
|
||||
std::vector<std::string> suggestions = mHunspell->suggest(charsetWord);
|
||||
*aSuggestionCount = static_cast<uint32_t>(suggestions.size());
|
||||
char ** wlst;
|
||||
*aSuggestionCount = mHunspell->suggest(&wlst, charsetWord);
|
||||
|
||||
if (*aSuggestionCount) {
|
||||
*aSuggestions = (char16_t **)moz_xmalloc(*aSuggestionCount * sizeof(char16_t *));
|
||||
@ -554,15 +553,15 @@ NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestio
|
||||
uint32_t index = 0;
|
||||
for (index = 0; index < *aSuggestionCount && NS_SUCCEEDED(rv); ++index) {
|
||||
// Convert the suggestion to utf16
|
||||
int32_t inLength = suggestions[index].size();
|
||||
int32_t inLength = strlen(wlst[index]);
|
||||
int32_t outLength;
|
||||
rv = mDecoder->GetMaxLength(suggestions[index].c_str(), inLength, &outLength);
|
||||
rv = mDecoder->GetMaxLength(wlst[index], inLength, &outLength);
|
||||
if (NS_SUCCEEDED(rv))
|
||||
{
|
||||
(*aSuggestions)[index] = (char16_t *) moz_xmalloc(sizeof(char16_t) * (outLength+1));
|
||||
if ((*aSuggestions)[index])
|
||||
{
|
||||
rv = mDecoder->Convert(suggestions[index].c_str(), &inLength, (*aSuggestions)[index], &outLength);
|
||||
rv = mDecoder->Convert(wlst[index], &inLength, (*aSuggestions)[index], &outLength);
|
||||
if (NS_SUCCEEDED(rv))
|
||||
(*aSuggestions)[index][outLength] = 0;
|
||||
}
|
||||
@ -578,6 +577,7 @@ NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestio
|
||||
rv = NS_ERROR_OUT_OF_MEMORY;
|
||||
}
|
||||
|
||||
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(*aSuggestionCount, wlst);
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
@ -99,7 +99,7 @@ public:
|
||||
void LoadDictionaryList(bool aNotifyChildProcesses);
|
||||
|
||||
// helper method for converting a word to the charset of the dictionary
|
||||
nsresult ConvertCharset(const char16_t* aStr, std::string* aDst);
|
||||
nsresult ConvertCharset(const char16_t* aStr, char ** aDst);
|
||||
|
||||
NS_DECL_NSIMEMORYREPORTER
|
||||
|
||||
|
@ -1,2 +1,2 @@
|
||||
Hunspell Version: 1.5.0
|
||||
Hunspell Version: 1.4.1
|
||||
Additional Patches: See patches directory.
|
||||
|
@ -79,7 +79,33 @@
|
||||
#include "affentry.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
AffEntry::~AffEntry() {
|
||||
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||
// register affix manager
|
||||
: pmyMgr(pmgr),
|
||||
next(NULL),
|
||||
nexteq(NULL),
|
||||
nextne(NULL),
|
||||
flgnxt(NULL) {
|
||||
// set up its initial values
|
||||
aflag = dp->aflag; // flag
|
||||
strip = dp->strip; // string to strip
|
||||
appnd = dp->appnd; // string to append
|
||||
numconds = dp->numconds; // length of the condition
|
||||
opts = dp->opts; // cross product flag
|
||||
// then copy over all of the conditions
|
||||
if (opts & aeLONGCOND) {
|
||||
memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
|
||||
c.l.conds2 = dp->c.l.conds2;
|
||||
} else
|
||||
memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
||||
morphcode = dp->morphcode;
|
||||
contclass = dp->contclass;
|
||||
contclasslen = dp->contclasslen;
|
||||
}
|
||||
|
||||
PfxEntry::~PfxEntry() {
|
||||
aflag = 0;
|
||||
pmyMgr = NULL;
|
||||
if (opts & aeLONGCOND)
|
||||
free(c.l.conds2);
|
||||
if (morphcode && !(opts & aeALIASM))
|
||||
@ -88,26 +114,17 @@ AffEntry::~AffEntry() {
|
||||
free(contclass);
|
||||
}
|
||||
|
||||
PfxEntry::PfxEntry(AffixMgr* pmgr)
|
||||
// register affix manager
|
||||
: pmyMgr(pmgr),
|
||||
next(NULL),
|
||||
nexteq(NULL),
|
||||
nextne(NULL),
|
||||
flgnxt(NULL) {
|
||||
}
|
||||
|
||||
// add prefix to this word assuming conditions hold
|
||||
std::string PfxEntry::add(const char* word, size_t len) {
|
||||
std::string result;
|
||||
char* PfxEntry::add(const char* word, size_t len) {
|
||||
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(len >= numconds) && test_condition(word) &&
|
||||
(!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
|
||||
/* we have a match so add prefix */
|
||||
result.assign(appnd);
|
||||
result.append(word + strip.size());
|
||||
std::string tword(appnd);
|
||||
tword.append(word + strip.size());
|
||||
return mystrdup(tword.c_str());
|
||||
}
|
||||
return result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline char* PfxEntry::nextchar(char* p) {
|
||||
@ -259,7 +276,8 @@ struct hentry* PfxEntry::checkword(const char* word,
|
||||
// if ((opts & aeXPRODUCT) && in_compound) {
|
||||
if ((opts & aeXPRODUCT)) {
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
FLAG_NULL, needflag, in_compound);
|
||||
NULL, 0, NULL, FLAG_NULL, needflag,
|
||||
in_compound);
|
||||
if (he)
|
||||
return he;
|
||||
}
|
||||
@ -273,6 +291,8 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
struct hentry* he; // hash entry of root word or NULL
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
@ -304,9 +324,8 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
|
||||
// cross checked combined with a suffix
|
||||
|
||||
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
||||
// hash entry of root word or NULL
|
||||
struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
needflag);
|
||||
he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
needflag);
|
||||
if (he)
|
||||
return he;
|
||||
}
|
||||
@ -316,15 +335,15 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
|
||||
}
|
||||
|
||||
// check if this prefix entry matches
|
||||
std::string PfxEntry::check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
std::string result;
|
||||
char* PfxEntry::check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
// and if there are enough chars in root word and added back strip chars
|
||||
// to meet the number of characters conditions, then test it
|
||||
|
||||
int tmpl = len - appnd.size(); // length of tmpword
|
||||
|
||||
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
|
||||
@ -351,21 +370,22 @@ std::string PfxEntry::check_twosfx_morph(const char* word,
|
||||
// ross checked combined with a suffix
|
||||
|
||||
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
||||
result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
|
||||
aeXPRODUCT,
|
||||
this, needflag);
|
||||
return pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
|
||||
aeXPRODUCT,
|
||||
this, needflag);
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// check if this prefix entry matches
|
||||
std::string PfxEntry::check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
std::string result;
|
||||
char* PfxEntry::check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag) {
|
||||
struct hentry* he; // hash entry of root word or NULL
|
||||
char* st;
|
||||
|
||||
// on entry prefix is 0 length or already matches the beginning of the word.
|
||||
// So if the remaining root word has positive length
|
||||
@ -391,8 +411,9 @@ std::string PfxEntry::check_morph(const char* word,
|
||||
// root word in the dictionary
|
||||
|
||||
if (test_condition(tmpword.c_str())) {
|
||||
std::string result;
|
||||
|
||||
tmpl += strip.size();
|
||||
struct hentry* he; // hash entry of root word or NULL
|
||||
if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
|
||||
do {
|
||||
if (TESTAFF(he->astr, aflag, he->alen) &&
|
||||
@ -434,19 +455,23 @@ std::string PfxEntry::check_morph(const char* word,
|
||||
// ross checked combined with a suffix
|
||||
|
||||
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
FLAG_NULL, needflag);
|
||||
if (!st.empty()) {
|
||||
st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
|
||||
FLAG_NULL, needflag);
|
||||
if (st) {
|
||||
result.append(st);
|
||||
free(st);
|
||||
}
|
||||
}
|
||||
|
||||
if (!result.empty())
|
||||
return mystrdup(result.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
SfxEntry::SfxEntry(AffixMgr* pmgr)
|
||||
SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)
|
||||
: pmyMgr(pmgr) // register affix manager
|
||||
,
|
||||
next(NULL),
|
||||
@ -456,21 +481,50 @@ SfxEntry::SfxEntry(AffixMgr* pmgr)
|
||||
l_morph(NULL),
|
||||
r_morph(NULL),
|
||||
eq_morph(NULL) {
|
||||
// set up its initial values
|
||||
aflag = dp->aflag; // char flag
|
||||
strip = dp->strip; // string to strip
|
||||
appnd = dp->appnd; // string to append
|
||||
numconds = dp->numconds; // length of the condition
|
||||
opts = dp->opts; // cross product flag
|
||||
|
||||
// then copy over all of the conditions
|
||||
if (opts & aeLONGCOND) {
|
||||
memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
|
||||
c.l.conds2 = dp->c.l.conds2;
|
||||
} else
|
||||
memcpy(c.conds, dp->c.conds, MAXCONDLEN);
|
||||
rappnd = appnd;
|
||||
reverseword(rappnd);
|
||||
morphcode = dp->morphcode;
|
||||
contclass = dp->contclass;
|
||||
contclasslen = dp->contclasslen;
|
||||
}
|
||||
|
||||
SfxEntry::~SfxEntry() {
|
||||
aflag = 0;
|
||||
pmyMgr = NULL;
|
||||
if (opts & aeLONGCOND)
|
||||
free(c.l.conds2);
|
||||
if (morphcode && !(opts & aeALIASM))
|
||||
free(morphcode);
|
||||
if (contclass && !(opts & aeALIASF))
|
||||
free(contclass);
|
||||
}
|
||||
|
||||
// add suffix to this word assuming conditions hold
|
||||
std::string SfxEntry::add(const char* word, size_t len) {
|
||||
std::string result;
|
||||
char* SfxEntry::add(const char* word, size_t len) {
|
||||
/* make sure all conditions match */
|
||||
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
|
||||
(len >= numconds) && test_condition(word + len, word) &&
|
||||
(!strip.size() ||
|
||||
(strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
|
||||
result.assign(word);
|
||||
std::string tword(word);
|
||||
/* we have a match so add suffix */
|
||||
result.replace(len - strip.size(), std::string::npos, appnd);
|
||||
tword.replace(len - strip.size(), std::string::npos, appnd);
|
||||
return mystrdup(tword.c_str());
|
||||
}
|
||||
return result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
inline char* SfxEntry::nextchar(char* p) {
|
||||
@ -615,6 +669,9 @@ struct hentry* SfxEntry::checkword(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
char** wlst,
|
||||
int maxSug,
|
||||
int* ns,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag,
|
||||
const FLAG badflag) {
|
||||
@ -685,6 +742,27 @@ struct hentry* SfxEntry::checkword(const char* word,
|
||||
return he;
|
||||
he = he->next_homonym; // check homonyms
|
||||
} while (he);
|
||||
|
||||
// obsolote stemming code (used only by the
|
||||
// experimental SuffixMgr:suggest_pos_stems)
|
||||
// store resulting root in wlst
|
||||
} else if (wlst && (*ns < maxSug)) {
|
||||
int cwrd = 1;
|
||||
for (int k = 0; k < *ns; k++)
|
||||
if (strcmp(tmpword, wlst[k]) == 0) {
|
||||
cwrd = 0;
|
||||
break;
|
||||
}
|
||||
if (cwrd) {
|
||||
wlst[*ns] = mystrdup(tmpword);
|
||||
if (wlst[*ns] == NULL) {
|
||||
for (int j = 0; j < *ns; j++)
|
||||
free(wlst[j]);
|
||||
*ns = -1;
|
||||
return NULL;
|
||||
}
|
||||
(*ns)++;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -697,6 +775,7 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag) {
|
||||
struct hentry* he; // hash entry pointer
|
||||
PfxEntry* ep = ppfx;
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
@ -734,18 +813,17 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
|
||||
// if all conditions are met then recall suffix_check
|
||||
|
||||
if (test_condition(end, beg)) {
|
||||
struct hentry* he; // hash entry pointer
|
||||
if (ppfx) {
|
||||
// handle conditional suffix
|
||||
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
|
||||
(FLAG)aflag, needflag, IN_CPD_NOT);
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
|
||||
(FLAG)aflag, needflag);
|
||||
else
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx,
|
||||
(FLAG)aflag, needflag, IN_CPD_NOT);
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx, NULL, 0,
|
||||
NULL, (FLAG)aflag, needflag);
|
||||
} else {
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
|
||||
(FLAG)aflag, needflag, IN_CPD_NOT);
|
||||
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
|
||||
(FLAG)aflag, needflag);
|
||||
}
|
||||
if (he)
|
||||
return he;
|
||||
@ -755,20 +833,23 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
|
||||
}
|
||||
|
||||
// see if two-level suffix is present in the word
|
||||
std::string SfxEntry::check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag) {
|
||||
char* SfxEntry::check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag) {
|
||||
PfxEntry* ep = ppfx;
|
||||
char* st;
|
||||
|
||||
std::string result;
|
||||
char result[MAXLNLEN];
|
||||
|
||||
*result = '\0';
|
||||
|
||||
// if this suffix is being cross checked with a prefix
|
||||
// but it does not support cross products skip it
|
||||
|
||||
if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
|
||||
return result;
|
||||
return NULL;
|
||||
|
||||
// upon entry suffix is 0 length or already matches the end of the word.
|
||||
// So if the remaining root word has positive length
|
||||
@ -802,34 +883,40 @@ std::string SfxEntry::check_twosfx_morph(const char* word,
|
||||
if (ppfx) {
|
||||
// handle conditional suffix
|
||||
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
|
||||
needflag);
|
||||
if (!st.empty()) {
|
||||
st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
|
||||
needflag);
|
||||
if (st) {
|
||||
if (ppfx->getMorph()) {
|
||||
result.append(ppfx->getMorph());
|
||||
result.append(" ");
|
||||
mystrcat(result, ppfx->getMorph(), MAXLNLEN);
|
||||
mystrcat(result, " ", MAXLNLEN);
|
||||
}
|
||||
result.append(st);
|
||||
mystrcat(result, st, MAXLNLEN);
|
||||
free(st);
|
||||
mychomp(result);
|
||||
}
|
||||
} else {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
|
||||
needflag);
|
||||
if (!st.empty()) {
|
||||
result.append(st);
|
||||
st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
|
||||
needflag);
|
||||
if (st) {
|
||||
mystrcat(result, st, MAXLNLEN);
|
||||
free(st);
|
||||
mychomp(result);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
|
||||
if (!st.empty()) {
|
||||
result.append(st);
|
||||
st =
|
||||
pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
|
||||
if (st) {
|
||||
mystrcat(result, st, MAXLNLEN);
|
||||
free(st);
|
||||
mychomp(result);
|
||||
}
|
||||
}
|
||||
if (*result)
|
||||
return mystrdup(result);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// get next homonym with same affix
|
||||
@ -861,11 +948,6 @@ struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void SfxEntry::initReverseWord() {
|
||||
rappnd = appnd;
|
||||
reverseword(rappnd);
|
||||
}
|
||||
|
||||
#if 0
|
||||
|
||||
Appendix: Understanding Affix Code
|
||||
|
@ -71,8 +71,10 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef AFFIX_HXX_
|
||||
#define AFFIX_HXX_
|
||||
#ifndef _AFFIX_HXX_
|
||||
#define _AFFIX_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
@ -80,7 +82,7 @@
|
||||
|
||||
/* A Prefix Entry */
|
||||
|
||||
class PfxEntry : public AffEntry {
|
||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry {
|
||||
private:
|
||||
PfxEntry(const PfxEntry&);
|
||||
PfxEntry& operator=(const PfxEntry&);
|
||||
@ -94,9 +96,10 @@ class PfxEntry : public AffEntry {
|
||||
PfxEntry* flgnxt;
|
||||
|
||||
public:
|
||||
explicit PfxEntry(AffixMgr* pmgr);
|
||||
PfxEntry(AffixMgr* pmgr, affentry* dp);
|
||||
~PfxEntry();
|
||||
|
||||
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry* checkword(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
@ -107,19 +110,19 @@ class PfxEntry : public AffEntry {
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
FLAG getFlag() { return aflag; }
|
||||
const char* getKey() { return appnd.c_str(); }
|
||||
std::string add(const char* word, size_t len);
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
inline const char* getKey() { return appnd.c_str(); }
|
||||
char* add(const char* word, size_t len);
|
||||
|
||||
inline short getKeyLen() { return appnd.size(); }
|
||||
|
||||
@ -144,7 +147,7 @@ class PfxEntry : public AffEntry {
|
||||
|
||||
/* A Suffix Entry */
|
||||
|
||||
class SfxEntry : public AffEntry {
|
||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry {
|
||||
private:
|
||||
SfxEntry(const SfxEntry&);
|
||||
SfxEntry& operator=(const SfxEntry&);
|
||||
@ -163,16 +166,20 @@ class SfxEntry : public AffEntry {
|
||||
SfxEntry* eq_morph;
|
||||
|
||||
public:
|
||||
explicit SfxEntry(AffixMgr* pmgr);
|
||||
SfxEntry(AffixMgr* pmgr, affentry* dp);
|
||||
~SfxEntry();
|
||||
|
||||
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry* checkword(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag,
|
||||
const FLAG badflag);
|
||||
char** wlst,
|
||||
int maxSug,
|
||||
int* ns,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
const FLAG badflag = FLAG_NULL);
|
||||
|
||||
struct hentry* check_twosfx(const char* word,
|
||||
int len,
|
||||
@ -180,11 +187,11 @@ class SfxEntry : public AffEntry {
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
struct hentry* get_next_homonym(struct hentry* he);
|
||||
struct hentry* get_next_homonym(struct hentry* word,
|
||||
int optflags,
|
||||
@ -192,9 +199,9 @@ class SfxEntry : public AffEntry {
|
||||
const FLAG cclass,
|
||||
const FLAG needflag);
|
||||
|
||||
FLAG getFlag() { return aflag; }
|
||||
const char* getKey() { return rappnd.c_str(); }
|
||||
std::string add(const char* word, size_t len);
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
inline const char* getKey() { return rappnd.c_str(); }
|
||||
char* add(const char* word, size_t len);
|
||||
|
||||
inline const char* getMorph() { return morphcode; }
|
||||
|
||||
@ -217,7 +224,6 @@ class SfxEntry : public AffEntry {
|
||||
inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
|
||||
void initReverseWord();
|
||||
|
||||
inline char* nextchar(char* p);
|
||||
inline int test_condition(const char* st, const char* begin);
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -71,13 +71,14 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef AFFIXMGR_HXX_
|
||||
#define AFFIXMGR_HXX_
|
||||
#ifndef _AFFIXMGR_HXX_
|
||||
#define _AFFIXMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
@ -92,16 +93,17 @@
|
||||
class PfxEntry;
|
||||
class SfxEntry;
|
||||
|
||||
class AffixMgr {
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffixMgr {
|
||||
PfxEntry* pStart[SETSIZE];
|
||||
SfxEntry* sStart[SETSIZE];
|
||||
PfxEntry* pFlag[SETSIZE];
|
||||
SfxEntry* sFlag[SETSIZE];
|
||||
const std::vector<HashMgr*>& alldic;
|
||||
const HashMgr* pHMgr;
|
||||
std::string keystring;
|
||||
std::string trystring;
|
||||
std::string encoding;
|
||||
HashMgr* pHMgr;
|
||||
HashMgr** alldic;
|
||||
int* maxdic;
|
||||
char* keystring;
|
||||
char* trystring;
|
||||
char* encoding;
|
||||
struct cs_info* csconv;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
@ -123,19 +125,19 @@ class AffixMgr {
|
||||
FLAG nongramsuggest;
|
||||
FLAG needaffix;
|
||||
int cpdmin;
|
||||
bool parsedrep;
|
||||
std::vector<replentry> reptable;
|
||||
int numrep;
|
||||
replentry* reptable;
|
||||
RepList* iconvtable;
|
||||
RepList* oconvtable;
|
||||
bool parsedmaptable;
|
||||
std::vector<mapentry> maptable;
|
||||
bool parsedbreaktable;
|
||||
std::vector<std::string> breaktable;
|
||||
bool parsedcheckcpd;
|
||||
std::vector<patentry> checkcpdtable;
|
||||
int nummap;
|
||||
mapentry* maptable;
|
||||
int numbreak;
|
||||
char** breaktable;
|
||||
int numcheckcpd;
|
||||
patentry* checkcpdtable;
|
||||
int simplifiedcpd;
|
||||
bool parseddefcpd;
|
||||
std::vector<flagentry> defcpdtable;
|
||||
int numdefcpd;
|
||||
flagentry* defcpdtable;
|
||||
phonetable* phone;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
@ -145,9 +147,10 @@ class AffixMgr {
|
||||
int sugswithdots;
|
||||
int cpdwordmax;
|
||||
int cpdmaxsyllable;
|
||||
std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
|
||||
std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
|
||||
std::string cpdsyllablenum; // syllable count incrementing flag
|
||||
char* cpdvowels;
|
||||
w_char* cpdvowels_utf16;
|
||||
int cpdvowels_utf16_len;
|
||||
char* cpdsyllablenum;
|
||||
const char* pfxappnd; // BUG: not stateless
|
||||
const char* sfxappnd; // BUG: not stateless
|
||||
int sfxextra; // BUG: not stateless
|
||||
@ -156,12 +159,12 @@ class AffixMgr {
|
||||
SfxEntry* sfx; // BUG: not stateless
|
||||
PfxEntry* pfx; // BUG: not stateless
|
||||
int checknum;
|
||||
std::string wordchars; // letters + spec. word characters
|
||||
char* wordchars;
|
||||
std::vector<w_char> wordchars_utf16;
|
||||
std::string ignorechars; // letters + spec. word characters
|
||||
char* ignorechars;
|
||||
std::vector<w_char> ignorechars_utf16;
|
||||
std::string version; // affix and dictionary file version string
|
||||
std::string lang; // language
|
||||
char* version;
|
||||
char* lang;
|
||||
int langnum;
|
||||
FLAG lemma_present;
|
||||
FLAG circumfix;
|
||||
@ -179,7 +182,7 @@ class AffixMgr {
|
||||
// affix)
|
||||
|
||||
public:
|
||||
AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
|
||||
AffixMgr(const char* affpath, HashMgr** ptr, int* md, const char* key = NULL);
|
||||
~AffixMgr();
|
||||
struct hentry* affix_check(const char* word,
|
||||
int len,
|
||||
@ -199,6 +202,9 @@ class AffixMgr {
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
char** wlst,
|
||||
int maxSug,
|
||||
int* ns,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
@ -208,39 +214,39 @@ class AffixMgr {
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string affix_check_morph(const char* word,
|
||||
int len,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
std::string prefix_check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
std::string suffix_check_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
char* affix_check_morph(const char* word,
|
||||
int len,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
char* prefix_check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* suffix_check_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
|
||||
std::string prefix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
std::string suffix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* prefix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* suffix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
std::string morphgen(const char* ts,
|
||||
int wl,
|
||||
const unsigned short* ap,
|
||||
unsigned short al,
|
||||
const char* morph,
|
||||
const char* targetmorph,
|
||||
int level);
|
||||
char* morphgen(const char* ts,
|
||||
int wl,
|
||||
const unsigned short* ap,
|
||||
unsigned short al,
|
||||
const char* morph,
|
||||
const char* targetmorph,
|
||||
int level);
|
||||
|
||||
int expand_rootword(struct guessword* wlst,
|
||||
int maxn,
|
||||
@ -267,7 +273,8 @@ class AffixMgr {
|
||||
int cpdcase_check(const char* word, int len);
|
||||
inline int candidate_check(const char* word, int len);
|
||||
void setcminmax(int* cmin, int* cmax, const char* word, int len);
|
||||
struct hentry* compound_check(const std::string& word,
|
||||
struct hentry* compound_check(const char* word,
|
||||
int len,
|
||||
short wordnum,
|
||||
short numsyllable,
|
||||
short maxwordnum,
|
||||
@ -287,37 +294,47 @@ class AffixMgr {
|
||||
hentry** words,
|
||||
hentry** rwords,
|
||||
char hu_mov_rule,
|
||||
std::string& result,
|
||||
const std::string* partresult);
|
||||
char** result,
|
||||
char* partresult);
|
||||
|
||||
std::vector<std::string> get_suffix_words(short unsigned* suff,
|
||||
int get_suffix_words(short unsigned* suff,
|
||||
int len,
|
||||
const char* root_word);
|
||||
const char* root_word,
|
||||
char** slst);
|
||||
|
||||
struct hentry* lookup(const char* word);
|
||||
const std::vector<replentry>& get_reptable() const;
|
||||
int get_numrep() const;
|
||||
struct replentry* get_reptable() const;
|
||||
RepList* get_iconvtable() const;
|
||||
RepList* get_oconvtable() const;
|
||||
struct phonetable* get_phonetable() const;
|
||||
const std::vector<mapentry>& get_maptable() const;
|
||||
const std::vector<std::string>& get_breaktable() const;
|
||||
const std::string& get_encoding();
|
||||
int get_nummap() const;
|
||||
struct mapentry* get_maptable() const;
|
||||
int get_numbreak() const;
|
||||
char** get_breaktable() const;
|
||||
char* get_encoding();
|
||||
int get_langnum() const;
|
||||
char* get_key_string();
|
||||
char* get_try_string() const;
|
||||
const std::string& get_wordchars() const;
|
||||
const char* get_wordchars() const;
|
||||
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||
const char* get_ignore() const;
|
||||
char* get_ignore() const;
|
||||
const std::vector<w_char>& get_ignore_utf16() const;
|
||||
int get_compound() const;
|
||||
FLAG get_compoundflag() const;
|
||||
FLAG get_compoundbegin() const;
|
||||
FLAG get_forbiddenword() const;
|
||||
FLAG get_nosuggest() const;
|
||||
FLAG get_nongramsuggest() const;
|
||||
FLAG get_needaffix() const;
|
||||
FLAG get_onlyincompound() const;
|
||||
FLAG get_compoundroot() const;
|
||||
FLAG get_lemma_present() const;
|
||||
int get_checknum() const;
|
||||
const char* get_prefix() const;
|
||||
const char* get_suffix() const;
|
||||
const char* get_derived() const;
|
||||
const std::string& get_version() const;
|
||||
const char* get_version() const;
|
||||
int have_contclass() const;
|
||||
int get_utf8() const;
|
||||
int get_complexprefixes() const;
|
||||
@ -338,25 +355,26 @@ class AffixMgr {
|
||||
|
||||
private:
|
||||
int parse_file(const char* affpath, const char* key);
|
||||
bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
|
||||
bool parse_num(const std::string& line, int* out, FileMgr* af);
|
||||
bool parse_cpdsyllable(const std::string& line, FileMgr* af);
|
||||
bool parse_reptable(const std::string& line, FileMgr* af);
|
||||
bool parse_convtable(const std::string& line,
|
||||
int parse_flag(char* line, unsigned short* out, FileMgr* af);
|
||||
int parse_num(char* line, int* out, FileMgr* af);
|
||||
int parse_cpdsyllable(char* line, FileMgr* af);
|
||||
int parse_reptable(char* line, FileMgr* af);
|
||||
int parse_convtable(char* line,
|
||||
FileMgr* af,
|
||||
RepList** rl,
|
||||
const std::string& keyword);
|
||||
bool parse_phonetable(const std::string& line, FileMgr* af);
|
||||
bool parse_maptable(const std::string& line, FileMgr* af);
|
||||
bool parse_breaktable(const std::string& line, FileMgr* af);
|
||||
bool parse_checkcpdtable(const std::string& line, FileMgr* af);
|
||||
bool parse_defcpdtable(const std::string& line, FileMgr* af);
|
||||
bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
|
||||
const char* keyword);
|
||||
int parse_phonetable(char* line, FileMgr* af);
|
||||
int parse_maptable(char* line, FileMgr* af);
|
||||
int parse_breaktable(char* line, FileMgr* af);
|
||||
int parse_checkcpdtable(char* line, FileMgr* af);
|
||||
int parse_defcpdtable(char* line, FileMgr* af);
|
||||
int parse_affix(char* line, const char at, FileMgr* af, char* dupflags);
|
||||
|
||||
void reverse_condition(std::string&);
|
||||
void debugflag(char* result, unsigned short flag);
|
||||
std::string& debugflag(std::string& result, unsigned short flag);
|
||||
int condlen(const char*);
|
||||
int encodeit(AffEntry& entry, const char* cs);
|
||||
int encodeit(affentry& entry, const char* cs);
|
||||
int build_pfxtree(PfxEntry* pfxptr);
|
||||
int build_sfxtree(SfxEntry* sfxptr);
|
||||
int process_pfx_order();
|
||||
|
@ -38,8 +38,8 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef ATYPES_HXX_
|
||||
#define ATYPES_HXX_
|
||||
#ifndef _ATYPES_HXX_
|
||||
#define _ATYPES_HXX_
|
||||
|
||||
#ifndef HUNSPELL_WARNING
|
||||
#include <stdio.h>
|
||||
@ -63,7 +63,7 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
|
||||
#define SETSIZE 256
|
||||
#define CONTSIZE 65536
|
||||
|
||||
// AffEntry options
|
||||
// affentry options
|
||||
#define aeXPRODUCT (1 << 0)
|
||||
#define aeUTF8 (1 << 1)
|
||||
#define aeALIASF (1 << 2)
|
||||
@ -85,6 +85,8 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
|
||||
#define SPELL_ORIGCAP (1 << 5)
|
||||
#define SPELL_WARN (1 << 6)
|
||||
|
||||
#define MAXLNLEN 8192
|
||||
|
||||
#define MINCPDLEN 3
|
||||
#define MAXCOMPOUND 10
|
||||
#define MAXCONDLEN 20
|
||||
@ -98,25 +100,46 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
|
||||
|
||||
#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
|
||||
|
||||
struct affentry {
|
||||
std::string strip;
|
||||
std::string appnd;
|
||||
char numconds;
|
||||
char opts;
|
||||
unsigned short aflag;
|
||||
unsigned short* contclass;
|
||||
short contclasslen;
|
||||
union {
|
||||
char conds[MAXCONDLEN];
|
||||
struct {
|
||||
char conds1[MAXCONDLEN_1];
|
||||
char* conds2;
|
||||
} l;
|
||||
} c;
|
||||
char* morphcode;
|
||||
};
|
||||
|
||||
struct guessword {
|
||||
char* word;
|
||||
bool allow;
|
||||
char* orig;
|
||||
};
|
||||
|
||||
typedef std::vector<std::string> mapentry;
|
||||
typedef std::vector<FLAG> flagentry;
|
||||
struct mapentry {
|
||||
char** set;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct flagentry {
|
||||
FLAG* def;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct patentry {
|
||||
std::string pattern;
|
||||
std::string pattern2;
|
||||
std::string pattern3;
|
||||
char* pattern;
|
||||
char* pattern2;
|
||||
char* pattern3;
|
||||
FLAG cond;
|
||||
FLAG cond2;
|
||||
patentry()
|
||||
: cond(FLAG_NULL)
|
||||
, cond2(FLAG_NULL) {
|
||||
}
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -38,17 +38,18 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef BASEAFF_HXX_
|
||||
#define BASEAFF_HXX_
|
||||
#ifndef _BASEAFF_HXX_
|
||||
#define _BASEAFF_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
#include <string>
|
||||
|
||||
class AffEntry {
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry {
|
||||
private:
|
||||
AffEntry(const AffEntry&);
|
||||
AffEntry& operator=(const AffEntry&);
|
||||
|
||||
public:
|
||||
protected:
|
||||
AffEntry()
|
||||
: numconds(0),
|
||||
opts(0),
|
||||
@ -56,7 +57,6 @@ class AffEntry {
|
||||
morphcode(0),
|
||||
contclass(NULL),
|
||||
contclasslen(0) {}
|
||||
virtual ~AffEntry();
|
||||
std::string appnd;
|
||||
std::string strip;
|
||||
unsigned char numconds;
|
||||
|
@ -76,7 +76,6 @@
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
#include <sstream>
|
||||
|
||||
#include "csutil.hxx"
|
||||
#include "atypes.hxx"
|
||||
@ -123,24 +122,26 @@ static struct unicode_info2* utf_tbl = NULL;
|
||||
static int utf_tbl_count =
|
||||
0; // utf_tbl can be used by multiple Hunspell instances
|
||||
|
||||
void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode)
|
||||
{
|
||||
#if defined(_WIN32) && defined(_MSC_VER)
|
||||
FILE* myfopen(const char* path, const char* mode) {
|
||||
#ifdef _WIN32
|
||||
#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
|
||||
if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
|
||||
int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
|
||||
wchar_t* buff = new wchar_t[len];
|
||||
wchar_t* buff2 = new wchar_t[len];
|
||||
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
|
||||
if (_wfullpath(buff2, buff, len) != NULL) {
|
||||
stream.open(buff2, mode);
|
||||
wchar_t* buff = (wchar_t*)malloc(len * sizeof(wchar_t));
|
||||
wchar_t* buff2 = (wchar_t*)malloc(len * sizeof(wchar_t));
|
||||
FILE* f = NULL;
|
||||
if (buff && buff2) {
|
||||
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
|
||||
if (_wfullpath(buff2, buff, len) != NULL) {
|
||||
f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
|
||||
}
|
||||
free(buff);
|
||||
free(buff2);
|
||||
}
|
||||
delete [] buff;
|
||||
delete [] buff2;
|
||||
return f;
|
||||
}
|
||||
else
|
||||
#endif
|
||||
stream.open(path, mode);
|
||||
return fopen(path, mode);
|
||||
}
|
||||
|
||||
std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
|
||||
@ -217,7 +218,7 @@ int u8_u16(std::vector<w_char>& dest, const std::string& src) {
|
||||
case 0xd0: { // 2-byte UTF-8 codes
|
||||
if ((*(u8 + 1) & 0xc0) == 0x80) {
|
||||
u2.h = (*u8 & 0x1f) >> 2;
|
||||
u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f);
|
||||
u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f);
|
||||
++u8;
|
||||
} else {
|
||||
HUNSPELL_WARNING(stderr,
|
||||
@ -274,35 +275,34 @@ int u8_u16(std::vector<w_char>& dest, const std::string& src) {
|
||||
return dest.size();
|
||||
}
|
||||
|
||||
namespace {
|
||||
class is_any_of {
|
||||
public:
|
||||
explicit is_any_of(const std::string& in) : chars(in) {}
|
||||
// strip strings into token based on single char delimiter
|
||||
// acts like strsep() but only uses a delim char and not
|
||||
// a delim string
|
||||
// default delimiter: white space characters
|
||||
|
||||
bool operator()(char c) { return chars.find(c) != std::string::npos; }
|
||||
|
||||
private:
|
||||
std::string chars;
|
||||
};
|
||||
}
|
||||
|
||||
std::string::const_iterator mystrsep(const std::string &str,
|
||||
std::string::const_iterator& start) {
|
||||
std::string::const_iterator end = str.end();
|
||||
|
||||
is_any_of op(" \t");
|
||||
// don't use isspace() here, the string can be in some random charset
|
||||
// that's way different than the locale's
|
||||
std::string::const_iterator sp = start;
|
||||
while (sp != end && op(*sp))
|
||||
++sp;
|
||||
|
||||
std::string::const_iterator dp = sp;
|
||||
while (dp != end && !op(*dp))
|
||||
++dp;
|
||||
|
||||
start = dp;
|
||||
return sp;
|
||||
char* mystrsep(char** stringp, const char delim) {
|
||||
char* mp = *stringp;
|
||||
if (*mp != '\0') {
|
||||
char* dp;
|
||||
if (delim) {
|
||||
dp = strchr(mp, delim);
|
||||
} else {
|
||||
// don't use isspace() here, the string can be in some random charset
|
||||
// that's way different than the locale's
|
||||
for (dp = mp; (*dp && *dp != ' ' && *dp != '\t'); dp++)
|
||||
;
|
||||
if (!*dp)
|
||||
dp = NULL;
|
||||
}
|
||||
if (dp) {
|
||||
*stringp = dp + 1;
|
||||
*dp = '\0';
|
||||
} else {
|
||||
*stringp = mp + strlen(mp);
|
||||
}
|
||||
return mp;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// replaces strdup with ansi version
|
||||
@ -320,98 +320,142 @@ char* mystrdup(const char* s) {
|
||||
return d;
|
||||
}
|
||||
|
||||
// strcat for limited length destination string
|
||||
char* mystrcat(char* dest, const char* st, int max) {
|
||||
int len;
|
||||
int len2;
|
||||
if (dest == NULL || st == NULL)
|
||||
return dest;
|
||||
len = strlen(dest);
|
||||
len2 = strlen(st);
|
||||
if (len + len2 + 1 > max)
|
||||
return dest;
|
||||
strcpy(dest + len, st);
|
||||
return dest;
|
||||
}
|
||||
|
||||
// remove cross-platform text line end characters
|
||||
void mychomp(std::string& s) {
|
||||
size_t k = s.size();
|
||||
size_t newsize = k;
|
||||
if ((k > 0) && ((s[k - 1] == '\r') || (s[k - 1] == '\n')))
|
||||
--newsize;
|
||||
if ((k > 1) && (s[k - 2] == '\r'))
|
||||
--newsize;
|
||||
s.resize(newsize);
|
||||
void mychomp(char* s) {
|
||||
size_t k = strlen(s);
|
||||
if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n')))
|
||||
*(s + k - 1) = '\0';
|
||||
if ((k > 1) && (*(s + k - 2) == '\r'))
|
||||
*(s + k - 2) = '\0';
|
||||
}
|
||||
|
||||
// break text to lines
|
||||
std::vector<std::string> line_tok(const std::string& text, char breakchar) {
|
||||
std::vector<std::string> ret;
|
||||
if (text.empty()) {
|
||||
return ret;
|
||||
// return number of lines
|
||||
int line_tok(const char* text, char*** lines, char breakchar) {
|
||||
int linenum = 0;
|
||||
if (!text) {
|
||||
return linenum;
|
||||
}
|
||||
char* dup = mystrdup(text);
|
||||
char* p = strchr(dup, breakchar);
|
||||
while (p) {
|
||||
linenum++;
|
||||
*p = '\0';
|
||||
p++;
|
||||
p = strchr(p, breakchar);
|
||||
}
|
||||
linenum++;
|
||||
*lines = (char**)malloc(linenum * sizeof(char*));
|
||||
if (!(*lines)) {
|
||||
free(dup);
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::stringstream ss(text);
|
||||
std::string tok;
|
||||
while(std::getline(ss, tok, breakchar)) {
|
||||
if (!tok.empty()) {
|
||||
ret.push_back(tok);
|
||||
p = dup;
|
||||
int l = 0;
|
||||
for (int i = 0; i < linenum; i++) {
|
||||
if (*p != '\0') {
|
||||
(*lines)[l] = mystrdup(p);
|
||||
if (!(*lines)[l]) {
|
||||
for (i = 0; i < l; i++)
|
||||
free((*lines)[i]);
|
||||
free(dup);
|
||||
return 0;
|
||||
}
|
||||
l++;
|
||||
}
|
||||
p += strlen(p) + 1;
|
||||
}
|
||||
|
||||
return ret;
|
||||
free(dup);
|
||||
if (!l) {
|
||||
free(*lines);
|
||||
*lines = NULL;
|
||||
}
|
||||
return l;
|
||||
}
|
||||
|
||||
// uniq line in place
|
||||
void line_uniq(std::string& text, char breakchar)
|
||||
{
|
||||
std::vector<std::string> lines = line_tok(text, breakchar);
|
||||
text.clear();
|
||||
if (lines.empty()) {
|
||||
return;
|
||||
}
|
||||
text = lines[0];
|
||||
for (size_t i = 1; i < lines.size(); ++i) {
|
||||
bool dup = false;
|
||||
for (size_t j = 0; j < i; ++j) {
|
||||
if (lines[i] == lines[j]) {
|
||||
dup = true;
|
||||
char* line_uniq(char* text, char breakchar) {
|
||||
char** lines;
|
||||
int linenum = line_tok(text, &lines, breakchar);
|
||||
int i;
|
||||
strcpy(text, lines[0]);
|
||||
for (i = 1; i < linenum; i++) {
|
||||
int dup = 0;
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (strcmp(lines[i], lines[j]) == 0) {
|
||||
dup = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!dup) {
|
||||
if (!text.empty())
|
||||
text.push_back(breakchar);
|
||||
text.append(lines[i]);
|
||||
if ((i > 1) || (*(lines[0]) != '\0')) {
|
||||
sprintf(text + strlen(text), "%c", breakchar);
|
||||
}
|
||||
strcat(text, lines[i]);
|
||||
}
|
||||
}
|
||||
for (i = 0; i < linenum; i++) {
|
||||
free(lines[i]);
|
||||
}
|
||||
free(lines);
|
||||
return text;
|
||||
}
|
||||
|
||||
// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "
|
||||
void line_uniq_app(std::string& text, char breakchar) {
|
||||
if (text.find(breakchar) == std::string::npos) {
|
||||
return;
|
||||
char* line_uniq_app(char** text, char breakchar) {
|
||||
if (!strchr(*text, breakchar)) {
|
||||
return *text;
|
||||
}
|
||||
|
||||
std::vector<std::string> lines = line_tok(text, breakchar);
|
||||
text.clear();
|
||||
if (lines.empty()) {
|
||||
return;
|
||||
}
|
||||
text = lines[0];
|
||||
for (size_t i = 1; i < lines.size(); ++i) {
|
||||
bool dup = false;
|
||||
for (size_t j = 0; j < i; ++j) {
|
||||
if (lines[i] == lines[j]) {
|
||||
dup = true;
|
||||
char** lines;
|
||||
int i;
|
||||
int linenum = line_tok(*text, &lines, breakchar);
|
||||
int dup = 0;
|
||||
for (i = 0; i < linenum; i++) {
|
||||
for (int j = 0; j < (i - 1); j++) {
|
||||
if (strcmp(lines[i], lines[j]) == 0) {
|
||||
*(lines[i]) = '\0';
|
||||
dup++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!dup) {
|
||||
if (!text.empty())
|
||||
text.push_back(breakchar);
|
||||
text.append(lines[i]);
|
||||
}
|
||||
if ((linenum - dup) == 1) {
|
||||
strcpy(*text, lines[0]);
|
||||
freelist(&lines, linenum);
|
||||
return *text;
|
||||
}
|
||||
char* newtext = (char*)malloc(strlen(*text) + 2 * linenum + 3 + 1);
|
||||
if (newtext) {
|
||||
free(*text);
|
||||
*text = newtext;
|
||||
} else {
|
||||
freelist(&lines, linenum);
|
||||
return *text;
|
||||
}
|
||||
strcpy(*text, " ( ");
|
||||
for (i = 0; i < linenum; i++)
|
||||
if (*(lines[i])) {
|
||||
sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");
|
||||
}
|
||||
}
|
||||
|
||||
if (lines.size() == 1) {
|
||||
text = lines[0];
|
||||
return;
|
||||
}
|
||||
|
||||
text.assign(" ( ");
|
||||
for (size_t i = 0; i < lines.size(); ++i) {
|
||||
text.append(lines[i]);
|
||||
text.append(" | ");
|
||||
}
|
||||
text[text.size() - 2] = ')'; // " ) "
|
||||
(*text)[strlen(*text) - 2] = ')'; // " ) "
|
||||
freelist(&lines, linenum);
|
||||
return *text;
|
||||
}
|
||||
|
||||
// append s to ends of every lines in text
|
||||
@ -425,6 +469,111 @@ std::string& strlinecat(std::string& str, const std::string& apd) {
|
||||
return str;
|
||||
}
|
||||
|
||||
// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
|
||||
// in the first line of the inputs
|
||||
// return 0, if inputs equal
|
||||
// return 1, if inputs may equal with a secondary suffix
|
||||
// otherwise return -1
|
||||
int morphcmp(const char* s, const char* t) {
|
||||
int se = 0;
|
||||
int te = 0;
|
||||
const char* sl;
|
||||
const char* tl;
|
||||
const char* olds;
|
||||
const char* oldt;
|
||||
if (!s || !t)
|
||||
return 1;
|
||||
olds = s;
|
||||
sl = strchr(s, '\n');
|
||||
s = strstr(s, MORPH_DERI_SFX);
|
||||
if (!s || (sl && sl < s))
|
||||
s = strstr(olds, MORPH_INFL_SFX);
|
||||
if (!s || (sl && sl < s)) {
|
||||
s = strstr(olds, MORPH_TERM_SFX);
|
||||
olds = NULL;
|
||||
}
|
||||
oldt = t;
|
||||
tl = strchr(t, '\n');
|
||||
t = strstr(t, MORPH_DERI_SFX);
|
||||
if (!t || (tl && tl < t))
|
||||
t = strstr(oldt, MORPH_INFL_SFX);
|
||||
if (!t || (tl && tl < t)) {
|
||||
t = strstr(oldt, MORPH_TERM_SFX);
|
||||
oldt = NULL;
|
||||
}
|
||||
while (s && t && (!sl || sl > s) && (!tl || tl > t)) {
|
||||
s += MORPH_TAG_LEN;
|
||||
t += MORPH_TAG_LEN;
|
||||
se = 0;
|
||||
te = 0;
|
||||
while ((*s == *t) && !se && !te) {
|
||||
s++;
|
||||
t++;
|
||||
switch (*s) {
|
||||
case ' ':
|
||||
case '\n':
|
||||
case '\t':
|
||||
case '\0':
|
||||
se = 1;
|
||||
}
|
||||
switch (*t) {
|
||||
case ' ':
|
||||
case '\n':
|
||||
case '\t':
|
||||
case '\0':
|
||||
te = 1;
|
||||
}
|
||||
}
|
||||
if (!se || !te) {
|
||||
// not terminal suffix difference
|
||||
if (olds)
|
||||
return -1;
|
||||
return 1;
|
||||
}
|
||||
olds = s;
|
||||
s = strstr(s, MORPH_DERI_SFX);
|
||||
if (!s || (sl && sl < s))
|
||||
s = strstr(olds, MORPH_INFL_SFX);
|
||||
if (!s || (sl && sl < s)) {
|
||||
s = strstr(olds, MORPH_TERM_SFX);
|
||||
olds = NULL;
|
||||
}
|
||||
oldt = t;
|
||||
t = strstr(t, MORPH_DERI_SFX);
|
||||
if (!t || (tl && tl < t))
|
||||
t = strstr(oldt, MORPH_INFL_SFX);
|
||||
if (!t || (tl && tl < t)) {
|
||||
t = strstr(oldt, MORPH_TERM_SFX);
|
||||
oldt = NULL;
|
||||
}
|
||||
}
|
||||
if (!s && !t && se && te)
|
||||
return 0;
|
||||
return 1;
|
||||
}
|
||||
|
||||
int get_sfxcount(const char* morph) {
|
||||
if (!morph || !*morph)
|
||||
return 0;
|
||||
int n = 0;
|
||||
const char* old = morph;
|
||||
morph = strstr(morph, MORPH_DERI_SFX);
|
||||
if (!morph)
|
||||
morph = strstr(old, MORPH_INFL_SFX);
|
||||
if (!morph)
|
||||
morph = strstr(old, MORPH_TERM_SFX);
|
||||
while (morph) {
|
||||
n++;
|
||||
old = morph;
|
||||
morph = strstr(morph + 1, MORPH_DERI_SFX);
|
||||
if (!morph)
|
||||
morph = strstr(old + 1, MORPH_INFL_SFX);
|
||||
if (!morph)
|
||||
morph = strstr(old + 1, MORPH_TERM_SFX);
|
||||
}
|
||||
return n;
|
||||
}
|
||||
|
||||
int fieldlen(const char* r) {
|
||||
int n = 0;
|
||||
while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') {
|
||||
@ -466,6 +615,33 @@ std::string& mystrrep(std::string& str,
|
||||
return str;
|
||||
}
|
||||
|
||||
char* mystrrep(char* word, const char* pat, const char* rep) {
|
||||
char* pos = strstr(word, pat);
|
||||
if (pos) {
|
||||
int replen = strlen(rep);
|
||||
int patlen = strlen(pat);
|
||||
while (pos) {
|
||||
if (replen < patlen) {
|
||||
char* end = word + strlen(word);
|
||||
char* next = pos + replen;
|
||||
char* prev = pos + strlen(pat);
|
||||
for (; prev < end;* next = *prev, prev++, next++)
|
||||
;
|
||||
*next = '\0';
|
||||
} else if (replen > patlen) {
|
||||
char* end = pos + patlen;
|
||||
char* next = word + strlen(word) + replen - patlen;
|
||||
char* prev = next - replen + patlen;
|
||||
for (; prev >= end;* next = *prev, prev--, next--)
|
||||
;
|
||||
}
|
||||
strncpy(pos, rep, replen);
|
||||
pos = strstr(word, pat);
|
||||
}
|
||||
}
|
||||
return word;
|
||||
}
|
||||
|
||||
// reverse word
|
||||
size_t reverseword(std::string& word) {
|
||||
std::reverse(word.begin(), word.end());
|
||||
@ -481,19 +657,35 @@ size_t reverseword_utf(std::string& word) {
|
||||
return w.size();
|
||||
}
|
||||
|
||||
void uniqlist(std::vector<std::string>& list) {
|
||||
if (list.size() < 2)
|
||||
return;
|
||||
|
||||
std::vector<std::string> ret;
|
||||
ret.push_back(list[0]);
|
||||
|
||||
for (size_t i = 1; i < list.size(); ++i) {
|
||||
if (std::find(ret.begin(), ret.end(), list[i]) == ret.end())
|
||||
ret.push_back(list[i]);
|
||||
int uniqlist(char** list, int n) {
|
||||
int i;
|
||||
if (n < 2)
|
||||
return n;
|
||||
for (i = 0; i < n; i++) {
|
||||
for (int j = 0; j < i; j++) {
|
||||
if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {
|
||||
free(list[i]);
|
||||
list[i] = NULL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
int m = 1;
|
||||
for (i = 1; i < n; i++)
|
||||
if (list[i]) {
|
||||
list[m] = list[i];
|
||||
m++;
|
||||
}
|
||||
return m;
|
||||
}
|
||||
|
||||
list.swap(ret);
|
||||
void freelist(char*** list, int n) {
|
||||
if (list && *list) {
|
||||
for (int i = 0; i < n; i++)
|
||||
free((*list)[i]);
|
||||
free(*list);
|
||||
*list = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
namespace {
|
||||
@ -2265,9 +2457,9 @@ static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName,
|
||||
*pBuf = '\0';
|
||||
}
|
||||
|
||||
struct cs_info* get_current_cs(const std::string& es) {
|
||||
char* normalized_encoding = new char[es.size() + 1];
|
||||
toAsciiLowerAndRemoveNonAlphanumeric(es.c_str(), normalized_encoding);
|
||||
struct cs_info* get_current_cs(const char* es) {
|
||||
char* normalized_encoding = new char[strlen(es) + 1];
|
||||
toAsciiLowerAndRemoveNonAlphanumeric(es, normalized_encoding);
|
||||
|
||||
struct cs_info* ccs = NULL;
|
||||
int n = sizeof(encds) / sizeof(encds[0]);
|
||||
@ -2282,7 +2474,7 @@ struct cs_info* get_current_cs(const std::string& es) {
|
||||
|
||||
if (!ccs) {
|
||||
HUNSPELL_WARNING(stderr,
|
||||
"error: unknown encoding %s: using %s as fallback\n", es.c_str(),
|
||||
"error: unknown encoding %s: using %s as fallback\n", es,
|
||||
encds[0].enc_name);
|
||||
ccs = encds[0].cs_table;
|
||||
}
|
||||
@ -2293,7 +2485,7 @@ struct cs_info* get_current_cs(const std::string& es) {
|
||||
// XXX This function was rewritten for mozilla. Instead of storing the
|
||||
// conversion tables static in this file, create them when needed
|
||||
// with help the mozilla backend.
|
||||
struct cs_info* get_current_cs(const std::string& es) {
|
||||
struct cs_info* get_current_cs(const char* es) {
|
||||
struct cs_info* ccs = new cs_info[256];
|
||||
// Initialze the array with dummy data so that we wouldn't need
|
||||
// to return null in case of failures.
|
||||
@ -2308,7 +2500,7 @@ struct cs_info* get_current_cs(const std::string& es) {
|
||||
|
||||
nsresult rv;
|
||||
|
||||
nsAutoCString label(es.c_str());
|
||||
nsAutoCString label(es);
|
||||
nsAutoCString encoding;
|
||||
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
|
||||
return ccs;
|
||||
@ -2373,18 +2565,21 @@ struct cs_info* get_current_cs(const std::string& es) {
|
||||
#endif
|
||||
|
||||
// primitive isalpha() replacement for tokenization
|
||||
std::string get_casechars(const char* enc) {
|
||||
char* get_casechars(const char* enc) {
|
||||
struct cs_info* csconv = get_current_cs(enc);
|
||||
std::string expw;
|
||||
for (int i = 0; i <= 255; ++i) {
|
||||
char expw[MAXLNLEN];
|
||||
char* p = expw;
|
||||
for (int i = 0; i <= 255; i++) {
|
||||
if (cupper(csconv, i) != clower(csconv, i)) {
|
||||
expw.push_back(static_cast<char>(i));
|
||||
*p = static_cast<char>(i);
|
||||
p++;
|
||||
}
|
||||
}
|
||||
*p = '\0';
|
||||
#ifdef MOZILLA_CLIENT
|
||||
delete[] csconv;
|
||||
#endif
|
||||
return expw;
|
||||
return mystrdup(expw);
|
||||
}
|
||||
|
||||
// language to encoding default map
|
||||
@ -2411,10 +2606,10 @@ static struct lang_map lang2enc[] =
|
||||
{"tr_TR", LANG_tr}, // for back-compatibility
|
||||
{"ru", LANG_ru}, {"uk", LANG_uk}};
|
||||
|
||||
int get_lang_num(const std::string& lang) {
|
||||
int get_lang_num(const char* lang) {
|
||||
int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
|
||||
for (int i = 0; i < n; i++) {
|
||||
if (strcmp(lang.c_str(), lang2enc[i].lang) == 0) {
|
||||
if (strcmp(lang, lang2enc[i].lang) == 0) {
|
||||
return lang2enc[i].num;
|
||||
}
|
||||
}
|
||||
@ -2423,21 +2618,26 @@ int get_lang_num(const std::string& lang) {
|
||||
|
||||
#ifndef OPENOFFICEORG
|
||||
#ifndef MOZILLA_CLIENT
|
||||
void initialize_utf_tbl() {
|
||||
int initialize_utf_tbl() {
|
||||
utf_tbl_count++;
|
||||
if (utf_tbl)
|
||||
return;
|
||||
utf_tbl = new unicode_info2[CONTSIZE];
|
||||
for (size_t j = 0; j < CONTSIZE; ++j) {
|
||||
utf_tbl[j].cletter = 0;
|
||||
utf_tbl[j].clower = (unsigned short)j;
|
||||
utf_tbl[j].cupper = (unsigned short)j;
|
||||
}
|
||||
for (size_t j = 0; j < UTF_LST_LEN; ++j) {
|
||||
utf_tbl[utf_lst[j].c].cletter = 1;
|
||||
utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
|
||||
utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
|
||||
}
|
||||
return 0;
|
||||
utf_tbl = (unicode_info2*)malloc(CONTSIZE * sizeof(unicode_info2));
|
||||
if (utf_tbl) {
|
||||
size_t j;
|
||||
for (j = 0; j < CONTSIZE; j++) {
|
||||
utf_tbl[j].cletter = 0;
|
||||
utf_tbl[j].clower = (unsigned short)j;
|
||||
utf_tbl[j].cupper = (unsigned short)j;
|
||||
}
|
||||
for (j = 0; j < UTF_LST_LEN; j++) {
|
||||
utf_tbl[utf_lst[j].c].cletter = 1;
|
||||
utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
|
||||
utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
|
||||
}
|
||||
} else
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
@ -2446,7 +2646,7 @@ void free_utf_tbl() {
|
||||
if (utf_tbl_count > 0)
|
||||
utf_tbl_count--;
|
||||
if (utf_tbl && (utf_tbl_count == 0)) {
|
||||
delete[] utf_tbl;
|
||||
free(utf_tbl);
|
||||
utf_tbl = NULL;
|
||||
}
|
||||
}
|
||||
@ -2575,6 +2775,18 @@ size_t remove_ignored_chars_utf(std::string& word,
|
||||
return w2.size();
|
||||
}
|
||||
|
||||
namespace {
|
||||
class is_any_of {
|
||||
public:
|
||||
is_any_of(const std::string& in) : chars(in) {}
|
||||
|
||||
bool operator()(char c) { return chars.find(c) != std::string::npos; }
|
||||
|
||||
private:
|
||||
std::string chars;
|
||||
};
|
||||
}
|
||||
|
||||
// strip all ignored characters in the string
|
||||
size_t remove_ignored_chars(std::string& word,
|
||||
const std::string& ignored_chars) {
|
||||
@ -2584,48 +2796,54 @@ size_t remove_ignored_chars(std::string& word,
|
||||
return word.size();
|
||||
}
|
||||
|
||||
bool parse_string(const std::string& line, std::string& out, int ln) {
|
||||
if (!out.empty()) {
|
||||
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
|
||||
return false;
|
||||
}
|
||||
int parse_string(char* line, char** out, int ln) {
|
||||
char* tp = line;
|
||||
char* piece;
|
||||
int i = 0;
|
||||
int np = 0;
|
||||
std::string::const_iterator iter = line.begin();
|
||||
std::string::const_iterator start_piece = mystrsep(line, iter);
|
||||
while (start_piece != line.end()) {
|
||||
switch (i) {
|
||||
case 0: {
|
||||
np++;
|
||||
break;
|
||||
if (*out) {
|
||||
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
|
||||
return 1;
|
||||
}
|
||||
piece = mystrsep(&tp, 0);
|
||||
while (piece) {
|
||||
if (*piece != '\0') {
|
||||
switch (i) {
|
||||
case 0: {
|
||||
np++;
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
*out = mystrdup(piece);
|
||||
if (!*out)
|
||||
return 1;
|
||||
np++;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
}
|
||||
case 1: {
|
||||
out.assign(start_piece, iter);
|
||||
np++;
|
||||
break;
|
||||
}
|
||||
default:
|
||||
break;
|
||||
i++;
|
||||
}
|
||||
++i;
|
||||
start_piece = mystrsep(line, iter);
|
||||
// free(piece);
|
||||
piece = mystrsep(&tp, 0);
|
||||
}
|
||||
if (np != 2) {
|
||||
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln);
|
||||
return false;
|
||||
return 1;
|
||||
}
|
||||
return true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool parse_array(const std::string& line,
|
||||
std::string& out,
|
||||
bool parse_array(char* line,
|
||||
char** out,
|
||||
std::vector<w_char>& out_utf16,
|
||||
int utf8,
|
||||
int ln) {
|
||||
if (!parse_string(line, out, ln))
|
||||
if (parse_string(line, out, ln))
|
||||
return false;
|
||||
if (utf8) {
|
||||
u8_u16(out_utf16, out);
|
||||
u8_u16(out_utf16, *out);
|
||||
std::sort(out_utf16.begin(), out_utf16.end());
|
||||
}
|
||||
return true;
|
||||
|
@ -71,14 +71,13 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef CSUTIL_HXX_
|
||||
#define CSUTIL_HXX_
|
||||
#ifndef __CSUTILHXX__
|
||||
#define __CSUTILHXX__
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
// First some base level utility routines
|
||||
|
||||
#include <fstream>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <string.h>
|
||||
@ -128,9 +127,8 @@
|
||||
#define FORBIDDENWORD 65510
|
||||
#define ONLYUPCASEFLAG 65511
|
||||
|
||||
// fix long pathname problem of WIN32 by using w_char std::fstream::open override
|
||||
LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
|
||||
std::ios_base::openmode mode);
|
||||
// fopen or optional _wfopen to fix long pathname problem of WIN32
|
||||
LIBHUNSPELL_DLL_EXPORTED FILE* myfopen(const char* path, const char* mode);
|
||||
|
||||
// convert UTF-16 characters to UTF-8
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
|
||||
@ -141,16 +139,21 @@ LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
|
||||
const std::string& src);
|
||||
|
||||
// remove end of line char(s)
|
||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(char* s);
|
||||
|
||||
// duplicate string
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
|
||||
|
||||
// strcat for limited length destination string
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrcat(char* dest, const char* st, int max);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str,
|
||||
std::string::const_iterator& start);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrsep(char** sptr, const char delim);
|
||||
|
||||
// replace pat by rep in word and return word
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrrep(char* word,
|
||||
const char* pat,
|
||||
const char* rep);
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
|
||||
const std::string& search,
|
||||
const std::string& replace);
|
||||
@ -160,13 +163,13 @@ LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
|
||||
const std::string& apd);
|
||||
|
||||
// tokenize into lines with new line
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text,
|
||||
char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char* text,
|
||||
char*** lines,
|
||||
char breakchar);
|
||||
|
||||
// tokenize into lines with new line and uniq in place
|
||||
LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* line_uniq(char* text, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* line_uniq_app(char** text, char breakchar);
|
||||
|
||||
// reverse word
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
|
||||
@ -175,7 +178,10 @@ LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
|
||||
|
||||
// remove duplicates
|
||||
LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list);
|
||||
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char** list, int n);
|
||||
|
||||
// free character array list
|
||||
LIBHUNSPELL_DLL_EXPORTED void freelist(char*** list, int n);
|
||||
|
||||
// character encoding information
|
||||
struct cs_info {
|
||||
@ -184,7 +190,7 @@ struct cs_info {
|
||||
unsigned char cupper;
|
||||
};
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
|
||||
int langnum);
|
||||
@ -194,13 +200,13 @@ LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
|
||||
int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
|
||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const char* es);
|
||||
|
||||
// get language identifiers of language codes
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char* lang);
|
||||
|
||||
// get characters of the given 8bit encoding with lower- and uppercase forms
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* get_casechars(const char* enc);
|
||||
|
||||
// convert std::string to all caps
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
|
||||
@ -250,12 +256,10 @@ LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
|
||||
std::string& word,
|
||||
const std::string& ignored_chars);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
|
||||
std::string& out,
|
||||
int ln);
|
||||
LIBHUNSPELL_DLL_EXPORTED int parse_string(char* line, char** out, int ln);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
|
||||
std::string& out,
|
||||
LIBHUNSPELL_DLL_EXPORTED bool parse_array(char* line,
|
||||
char** out,
|
||||
std::vector<w_char>& out_utf16,
|
||||
int utf8,
|
||||
int ln);
|
||||
@ -266,6 +270,10 @@ LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
|
||||
const std::string& morph,
|
||||
const std::string& var);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char* s, const char* t);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char* morph);
|
||||
|
||||
// conversion function for protected memory
|
||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
|
||||
|
||||
|
@ -86,33 +86,33 @@ int FileMgr::fail(const char* err, const char* par) {
|
||||
FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) {
|
||||
in[0] = '\0';
|
||||
|
||||
myopen(fin, file, std::ios_base::in);
|
||||
if (!fin.is_open()) {
|
||||
fin = myfopen(file, "r");
|
||||
if (!fin) {
|
||||
// check hzipped file
|
||||
std::string st(file);
|
||||
st.append(HZIP_EXTENSION);
|
||||
hin = new Hunzip(st.c_str(), key);
|
||||
}
|
||||
if (!fin.is_open() && !hin->is_open())
|
||||
if (!fin && !hin)
|
||||
fail(MSG_OPEN, file);
|
||||
}
|
||||
|
||||
FileMgr::~FileMgr() {
|
||||
delete hin;
|
||||
if (fin)
|
||||
fclose(fin);
|
||||
if (hin)
|
||||
delete hin;
|
||||
}
|
||||
|
||||
bool FileMgr::getline(std::string& dest) {
|
||||
bool ret = false;
|
||||
++linenum;
|
||||
if (fin.is_open()) {
|
||||
ret = static_cast<bool>(std::getline(fin, dest));
|
||||
} else if (hin->is_open()) {
|
||||
ret = hin->getline(dest);
|
||||
}
|
||||
if (!ret) {
|
||||
--linenum;
|
||||
}
|
||||
return ret;
|
||||
char* FileMgr::getline() {
|
||||
const char* l;
|
||||
linenum++;
|
||||
if (fin)
|
||||
return fgets(in, BUFSIZE - 1, fin);
|
||||
if (hin && ((l = hin->getline()) != NULL))
|
||||
return strcpy(in, l);
|
||||
linenum--;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int FileMgr::getlinenum() {
|
||||
|
@ -72,21 +72,21 @@
|
||||
*/
|
||||
|
||||
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
||||
#ifndef FILEMGR_HXX_
|
||||
#define FILEMGR_HXX_
|
||||
#ifndef _FILEMGR_HXX_
|
||||
#define _FILEMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "hunzip.hxx"
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
|
||||
class FileMgr {
|
||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr {
|
||||
private:
|
||||
FileMgr(const FileMgr&);
|
||||
FileMgr& operator=(const FileMgr&);
|
||||
|
||||
protected:
|
||||
std::ifstream fin;
|
||||
FILE* fin;
|
||||
Hunzip* hin;
|
||||
char in[BUFSIZE + 50]; // input buffer
|
||||
int fail(const char* err, const char* par);
|
||||
@ -95,7 +95,7 @@ class FileMgr {
|
||||
public:
|
||||
FileMgr(const char* filename, const char* key = NULL);
|
||||
~FileMgr();
|
||||
bool getline(std::string&);
|
||||
char* getline();
|
||||
int getlinenum();
|
||||
};
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -71,8 +71,10 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef HASHMGR_HXX_
|
||||
#define HASHMGR_HXX_
|
||||
#ifndef _HASHMGR_HXX_
|
||||
#define _HASHMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
@ -84,7 +86,7 @@
|
||||
|
||||
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
||||
|
||||
class HashMgr {
|
||||
class LIBHUNSPELL_DLL_EXPORTED HashMgr {
|
||||
int tablesize;
|
||||
struct hentry** tableptr;
|
||||
flag flag_mode;
|
||||
@ -92,10 +94,10 @@ class HashMgr {
|
||||
int utf8;
|
||||
unsigned short forbiddenword;
|
||||
int langnum;
|
||||
std::string enc;
|
||||
std::string lang;
|
||||
char* enc;
|
||||
char* lang;
|
||||
struct cs_info* csconv;
|
||||
std::string ignorechars;
|
||||
char* ignorechars;
|
||||
std::vector<w_char> ignorechars_utf16;
|
||||
int numaliasf; // flag vector `compression' with aliases
|
||||
unsigned short** aliasf;
|
||||
@ -112,35 +114,35 @@ class HashMgr {
|
||||
struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
|
||||
|
||||
int add(const std::string& word);
|
||||
int add_with_affix(const std::string& word, const std::string& pattern);
|
||||
int remove(const std::string& word);
|
||||
int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
|
||||
bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
|
||||
unsigned short decode_flag(const char* flag) const;
|
||||
char* encode_flag(unsigned short flag) const;
|
||||
int is_aliasf() const;
|
||||
int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
|
||||
int is_aliasm() const;
|
||||
char* get_aliasm(int index) const;
|
||||
int add_with_affix(const char* word, const char* pattern);
|
||||
int remove(const char* word);
|
||||
int decode_flags(unsigned short** result, char* flags, FileMgr* af);
|
||||
unsigned short decode_flag(const char* flag);
|
||||
char* encode_flag(unsigned short flag);
|
||||
int is_aliasf();
|
||||
int get_aliasf(int index, unsigned short** fvec, FileMgr* af);
|
||||
int is_aliasm();
|
||||
char* get_aliasm(int index);
|
||||
|
||||
private:
|
||||
int get_clen_and_captype(const std::string& word, int* captype);
|
||||
int load_tables(const char* tpath, const char* key);
|
||||
int add_word(const std::string& word,
|
||||
int add_word(const char* word,
|
||||
int wbl,
|
||||
int wcl,
|
||||
unsigned short* ap,
|
||||
int al,
|
||||
const std::string* desc,
|
||||
const char* desc,
|
||||
bool onlyupcase);
|
||||
int load_config(const char* affpath, const char* key);
|
||||
bool parse_aliasf(const std::string& line, FileMgr* af);
|
||||
int parse_aliasf(char* line, FileMgr* af);
|
||||
int add_hidden_capitalized_word(const std::string& word,
|
||||
int wcl,
|
||||
unsigned short* flags,
|
||||
int al,
|
||||
const std::string* dp,
|
||||
char* dp,
|
||||
int captype);
|
||||
bool parse_aliasm(const std::string& line, FileMgr* af);
|
||||
int parse_aliasm(char* line, FileMgr* af);
|
||||
int remove_forbidden_flag(const std::string& word);
|
||||
};
|
||||
|
||||
|
@ -38,8 +38,8 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef HTYPES_HXX_
|
||||
#define HTYPES_HXX_
|
||||
#ifndef _HTYPES_HXX_
|
||||
#define _HTYPES_HXX_
|
||||
|
||||
#define ROTATE_LEN 5
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -38,8 +38,8 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef MYSPELLMGR_H_
|
||||
#define MYSPELLMGR_H_
|
||||
#ifndef _MYSPELLMGR_H_
|
||||
#define _MYSPELLMGR_H_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
@ -68,7 +68,7 @@ LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED const char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
|
@ -70,29 +70,26 @@
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
#ifndef MYSPELLMGR_HXX_
|
||||
#define MYSPELLMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
#include "w_char.hxx"
|
||||
#include <string>
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "suggestmgr.hxx"
|
||||
#include "langnum.hxx"
|
||||
#include <vector>
|
||||
|
||||
#define SPELL_XML "<?xml?>"
|
||||
|
||||
#define MAXDIC 20
|
||||
#define MAXSUGGESTION 15
|
||||
#define MAXSHARPS 5
|
||||
#define MAXWORDLEN 176
|
||||
|
||||
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
|
||||
# define H_DEPRECATED __attribute__((__deprecated__))
|
||||
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
|
||||
# define H_DEPRECATED __declspec(deprecated)
|
||||
#else
|
||||
# define H_DEPRECATED
|
||||
#endif
|
||||
#define HUNSPELL_OK (1 << 0)
|
||||
#define HUNSPELL_OK_WARN (1 << 1)
|
||||
|
||||
class HunspellImpl;
|
||||
#ifndef _MYSPELLMGR_HXX_
|
||||
#define _MYSPELLMGR_HXX_
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
private:
|
||||
@ -100,7 +97,17 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
Hunspell& operator=(const Hunspell&);
|
||||
|
||||
private:
|
||||
HunspellImpl* m_Impl;
|
||||
AffixMgr* pAMgr;
|
||||
HashMgr* pHMgr[MAXDIC];
|
||||
int maxdic;
|
||||
SuggestMgr* pSMgr;
|
||||
char* affixpath;
|
||||
char* encoding;
|
||||
struct cs_info* csconv;
|
||||
int langnum;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
char** wordbreak;
|
||||
|
||||
public:
|
||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||
@ -111,6 +118,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
* long path names (without the long path prefix Hunspell will use fopen()
|
||||
* with system-dependent character encoding instead of _wfopen()).
|
||||
*/
|
||||
|
||||
Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
|
||||
~Hunspell();
|
||||
|
||||
@ -118,7 +126,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
int add_dic(const char* dpath, const char* key = NULL);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: false = bad word, true = good word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*
|
||||
* plus output:
|
||||
* info: information bit array, fields:
|
||||
@ -126,8 +134,8 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
* SPELL_FORBIDDEN = an explicit forbidden word
|
||||
* root: root (stem), when input is a word with affix(es)
|
||||
*/
|
||||
bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
|
||||
H_DEPRECATED int spell(const char* word, int* info = NULL, char** root = NULL);
|
||||
|
||||
int spell(const char* word, int* info = NULL, char** root = NULL);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
@ -136,8 +144,8 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
std::vector<std::string> suggest(const std::string& word);
|
||||
H_DEPRECATED int suggest(char*** slst, const char* word);
|
||||
|
||||
int suggest(char*** slst, const char* word);
|
||||
|
||||
/* Suggest words from suffix rules
|
||||
* suffix_suggest(suggestions, root_word)
|
||||
@ -147,37 +155,36 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
std::vector<std::string> suffix_suggest(const std::string& root_word);
|
||||
H_DEPRECATED int suffix_suggest(char*** slst, const char* root_word);
|
||||
int suffix_suggest(char*** slst, const char* root_word);
|
||||
|
||||
/* deallocate suggestion lists */
|
||||
H_DEPRECATED void free_list(char*** slst, int n);
|
||||
|
||||
const std::string& get_dict_encoding() const;
|
||||
H_DEPRECATED const char* get_dic_encoding() const;
|
||||
void free_list(char*** slst, int n);
|
||||
|
||||
char* get_dic_encoding();
|
||||
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
std::vector<std::string> analyze(const std::string& word);
|
||||
H_DEPRECATED int analyze(char*** slst, const char* word);
|
||||
|
||||
/* stem(word) - stemmer function */
|
||||
std::vector<std::string> stem(const std::string& word);
|
||||
H_DEPRECATED int stem(char*** slst, const char* word);
|
||||
int analyze(char*** slst, const char* word);
|
||||
|
||||
/* stem(analysis, n) - get stems from a morph. analysis
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
int stem(char*** slst, const char* word);
|
||||
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = analyze(&result, "words");
|
||||
* int n2 = stem(&result2, result, n1);
|
||||
*/
|
||||
std::vector<std::string> stem(const std::vector<std::string>& morph);
|
||||
H_DEPRECATED int stem(char*** slst, char** morph, int n);
|
||||
|
||||
int stem(char*** slst, char** morph, int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
std::vector<std::string> generate(const std::string& word, const std::string& word2);
|
||||
H_DEPRECATED int generate(char*** slst, const char* word, const char* word2);
|
||||
|
||||
int generate(char*** slst, const char* word, const char* word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
@ -186,38 +193,66 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
* int n = generate(&result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
|
||||
H_DEPRECATED int generate(char*** slst, const char* word, char** desc, int n);
|
||||
|
||||
int generate(char*** slst, const char* word, char** desc, int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
int add(const std::string& word);
|
||||
int add(const char* word);
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
int add_with_affix(const std::string& word, const std::string& example);
|
||||
int add_with_affix(const char* word, const char* example);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
int remove(const std::string& word);
|
||||
int remove(const char* word);
|
||||
|
||||
/* other */
|
||||
|
||||
/* get extra word characters definied in affix file for tokenization */
|
||||
const std::string& get_wordchars() const;
|
||||
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||
const char* get_wordchars();
|
||||
const std::vector<w_char>& get_wordchars_utf16();
|
||||
|
||||
const std::string& get_version() const;
|
||||
struct cs_info* get_csconv();
|
||||
const char* get_version();
|
||||
|
||||
int get_langnum() const;
|
||||
|
||||
/* need for putdic */
|
||||
bool input_conv(const std::string& word, std::string& dest);
|
||||
int input_conv(const char* word, char* dest, size_t destsize);
|
||||
|
||||
private:
|
||||
void cleanword(std::string& dest, const char*, int* pcaptype, int* pabbrev);
|
||||
size_t cleanword2(std::string& dest,
|
||||
std::vector<w_char>& dest_u,
|
||||
const char*,
|
||||
int* w_len,
|
||||
int* pcaptype,
|
||||
size_t* pabbrev);
|
||||
void mkinitcap(std::string& u8);
|
||||
int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
|
||||
int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
|
||||
void mkallcap(std::string& u8);
|
||||
int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
|
||||
struct hentry* checkword(const char*, int* info, char** root);
|
||||
std::string sharps_u8_l1(const std::string& source);
|
||||
hentry*
|
||||
spellsharps(std::string& base, size_t start_pos, int, int, int* info, char** root);
|
||||
int is_keepcase(const hentry* rv);
|
||||
int insert_sug(char*** slst, const char* word, int ns);
|
||||
void cat_result(std::string& result, char* st);
|
||||
char* stem_description(const char* desc);
|
||||
int spellml(char*** slst, const char* word);
|
||||
std::string get_xml_par(const char* par);
|
||||
const char* get_xml_pos(const char* s, const char* attr);
|
||||
int get_xml_list(char*** slst, const char* list, const char* tag);
|
||||
int check_xml_par(const char* q, const char* attr, const char* value);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
#ifndef HUNSPELL_VISIBILITY_H_
|
||||
#define HUNSPELL_VISIBILITY_H_
|
||||
#ifndef _HUNSPELL_VISIBILITY_H_
|
||||
#define _HUNSPELL_VISIBILITY_H_
|
||||
|
||||
#if defined(HUNSPELL_STATIC)
|
||||
# define LIBHUNSPELL_DLL_EXPORTED
|
||||
|
@ -59,7 +59,7 @@ int Hunzip::fail(const char* err, const char* par) {
|
||||
}
|
||||
|
||||
Hunzip::Hunzip(const char* file, const char* key)
|
||||
: bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0) {
|
||||
: fin(NULL), bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0), dec(NULL) {
|
||||
in[0] = out[0] = line[0] = '\0';
|
||||
filename = mystrdup(file);
|
||||
if (getcode(key) == -1)
|
||||
@ -70,19 +70,19 @@ Hunzip::Hunzip(const char* file, const char* key)
|
||||
|
||||
int Hunzip::getcode(const char* key) {
|
||||
unsigned char c[2];
|
||||
int i, j, n;
|
||||
int i, j, n, p;
|
||||
int allocatedbit = BASEBITREC;
|
||||
const char* enc = key;
|
||||
|
||||
if (!filename)
|
||||
return -1;
|
||||
|
||||
myopen(fin, filename, std::ios_base::in | std::ios_base::binary);
|
||||
if (!fin.is_open())
|
||||
fin = myfopen(filename, "rb");
|
||||
if (!fin)
|
||||
return -1;
|
||||
|
||||
// read magic number
|
||||
if (!fin.read(in, 3) ||
|
||||
if ((fread(in, 1, 3, fin) < MAGICLEN) ||
|
||||
!(strncmp(MAGIC, in, MAGICLEN) == 0 ||
|
||||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
|
||||
return fail(MSG_FORMAT, filename);
|
||||
@ -93,7 +93,7 @@ int Hunzip::getcode(const char* key) {
|
||||
unsigned char cs;
|
||||
if (!key)
|
||||
return fail(MSG_KEY, filename);
|
||||
if (!fin.read(reinterpret_cast<char*>(c), 1))
|
||||
if (fread(&c, 1, 1, fin) < 1)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
for (cs = 0; *enc; enc++)
|
||||
cs ^= *enc;
|
||||
@ -104,7 +104,7 @@ int Hunzip::getcode(const char* key) {
|
||||
key = NULL;
|
||||
|
||||
// read record count
|
||||
if (!fin.read(reinterpret_cast<char*>(c), 2))
|
||||
if (fread(&c, 1, 2, fin) < 2)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
|
||||
if (key) {
|
||||
@ -115,14 +115,16 @@ int Hunzip::getcode(const char* key) {
|
||||
}
|
||||
|
||||
n = ((int)c[0] << 8) + c[1];
|
||||
dec.resize(BASEBITREC);
|
||||
dec = (struct bit*)malloc(BASEBITREC * sizeof(struct bit));
|
||||
if (!dec)
|
||||
return fail(MSG_MEMORY, filename);
|
||||
dec[0].v[0] = 0;
|
||||
dec[0].v[1] = 0;
|
||||
|
||||
// read codes
|
||||
for (i = 0; i < n; i++) {
|
||||
unsigned char l;
|
||||
if (!fin.read(reinterpret_cast<char*>(c), 2))
|
||||
if (fread(c, 1, 2, fin) < 2)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
@ -132,14 +134,14 @@ int Hunzip::getcode(const char* key) {
|
||||
enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
if (!fin.read(reinterpret_cast<char*>(&l), 1))
|
||||
if (fread(&l, 1, 1, fin) < 1)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
l ^= *enc;
|
||||
}
|
||||
if (!fin.read(in, l / 8 + 1))
|
||||
if (fread(in, 1, l / 8 + 1, fin) < (size_t)l / 8 + 1)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key)
|
||||
for (j = 0; j <= l / 8; j++) {
|
||||
@ -147,7 +149,7 @@ int Hunzip::getcode(const char* key) {
|
||||
enc = key;
|
||||
in[j] ^= *enc;
|
||||
}
|
||||
int p = 0;
|
||||
p = 0;
|
||||
for (j = 0; j < l; j++) {
|
||||
int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
@ -156,7 +158,7 @@ int Hunzip::getcode(const char* key) {
|
||||
lastbit++;
|
||||
if (lastbit == allocatedbit) {
|
||||
allocatedbit += BASEBITREC;
|
||||
dec.resize(allocatedbit);
|
||||
dec = (struct bit*)realloc(dec, allocatedbit * sizeof(struct bit));
|
||||
}
|
||||
dec[lastbit].v[0] = 0;
|
||||
dec[lastbit].v[1] = 0;
|
||||
@ -171,6 +173,10 @@ int Hunzip::getcode(const char* key) {
|
||||
}
|
||||
|
||||
Hunzip::~Hunzip() {
|
||||
if (dec)
|
||||
free(dec);
|
||||
if (fin)
|
||||
fclose(fin);
|
||||
if (filename)
|
||||
free(filename);
|
||||
}
|
||||
@ -179,17 +185,16 @@ int Hunzip::getbuf() {
|
||||
int p = 0;
|
||||
int o = 0;
|
||||
do {
|
||||
if (inc == 0) {
|
||||
fin.read(in, BUFSIZE);
|
||||
inbits = fin.gcount() * 8;
|
||||
}
|
||||
if (inc == 0)
|
||||
inbits = fread(in, 1, BUFSIZE, fin) * 8;
|
||||
for (; inc < inbits; inc++) {
|
||||
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
if (oldp == lastbit) {
|
||||
fin.close();
|
||||
fclose(fin);
|
||||
fin = NULL;
|
||||
// add last odd byte
|
||||
if (dec[lastbit].c[0])
|
||||
out[o++] = dec[lastbit].c[1];
|
||||
@ -207,11 +212,11 @@ int Hunzip::getbuf() {
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
bool Hunzip::getline(std::string& dest) {
|
||||
const char* Hunzip::getline() {
|
||||
char linebuf[BUFSIZE];
|
||||
int l = 0, eol = 0, left = 0, right = 0;
|
||||
if (bufsiz == -1)
|
||||
return false;
|
||||
return NULL;
|
||||
while (l < bufsiz && !eol) {
|
||||
linebuf[l++] = out[outc];
|
||||
switch (out[outc]) {
|
||||
@ -246,7 +251,7 @@ bool Hunzip::getline(std::string& dest) {
|
||||
}
|
||||
if (++outc == bufsiz) {
|
||||
outc = 0;
|
||||
bufsiz = fin.is_open() ? getbuf() : -1;
|
||||
bufsiz = fin ? getbuf() : -1;
|
||||
}
|
||||
}
|
||||
if (right)
|
||||
@ -254,6 +259,5 @@ bool Hunzip::getline(std::string& dest) {
|
||||
else
|
||||
linebuf[l] = '\0';
|
||||
strcpy(line + left, linebuf);
|
||||
dest.assign(line);
|
||||
return true;
|
||||
return line;
|
||||
}
|
||||
|
@ -41,14 +41,12 @@
|
||||
/* hunzip: file decompression for sorted dictionaries with optional encryption,
|
||||
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
||||
|
||||
#ifndef HUNZIP_HXX_
|
||||
#define HUNZIP_HXX_
|
||||
#ifndef _HUNZIP_HXX_
|
||||
#define _HUNZIP_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <fstream>
|
||||
#include <vector>
|
||||
|
||||
#define BUFSIZE 65536
|
||||
#define HZIP_EXTENSION ".hz"
|
||||
@ -70,9 +68,9 @@ class LIBHUNSPELL_DLL_EXPORTED Hunzip {
|
||||
|
||||
protected:
|
||||
char* filename;
|
||||
std::ifstream fin;
|
||||
FILE* fin;
|
||||
int bufsiz, lastbit, inc, inbits, outc;
|
||||
std::vector<bit> dec; // code table
|
||||
struct bit* dec; // code table
|
||||
char in[BUFSIZE]; // input buffer
|
||||
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
||||
char line[BUFSIZE + 50]; // decoded line
|
||||
@ -83,8 +81,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunzip {
|
||||
public:
|
||||
Hunzip(const char* filename, const char* key = NULL);
|
||||
~Hunzip();
|
||||
bool is_open() { return fin.is_open(); }
|
||||
bool getline(std::string& dest);
|
||||
const char* getline();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -38,12 +38,12 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef LANGNUM_HXX_
|
||||
#define LANGNUM_HXX_
|
||||
#ifndef _LANGNUM_HXX_
|
||||
#define _LANGNUM_HXX_
|
||||
|
||||
/*
|
||||
language numbers for language specific codes
|
||||
see https://wiki.openoffice.org/w/index.php?title=Languages&oldid=230199
|
||||
see http://l10n.openoffice.org/languages.html
|
||||
*/
|
||||
|
||||
enum {
|
||||
|
@ -36,13 +36,15 @@
|
||||
#include "phonet.hxx"
|
||||
|
||||
void init_phonet_hash(phonetable& parms) {
|
||||
for (int i = 0; i < HASHSIZE; i++) {
|
||||
int i, k;
|
||||
|
||||
for (i = 0; i < HASHSIZE; i++) {
|
||||
parms.hash[i] = -1;
|
||||
}
|
||||
|
||||
for (int i = 0; parms.rules[i][0] != '\0'; i += 2) {
|
||||
for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
|
||||
/** set hash value **/
|
||||
int k = (unsigned char)parms.rules[i][0];
|
||||
k = (unsigned char)parms.rules[i][0];
|
||||
|
||||
if (parms.hash[k] < 0) {
|
||||
parms.hash[k] = i;
|
||||
@ -71,8 +73,9 @@ static int myisalpha(char ch) {
|
||||
std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
|
||||
int i, k = 0, p, z;
|
||||
int k0, n0, p0 = -333;
|
||||
int k0, n0, p0 = -333, z0;
|
||||
char c;
|
||||
const char* s;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
size_t len = inword.size();
|
||||
@ -87,15 +90,15 @@ std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
i = z = 0;
|
||||
while ((c = word[i]) != '\0') {
|
||||
int n = parms.hash[(uchar)c];
|
||||
int z0 = 0;
|
||||
z0 = 0;
|
||||
|
||||
if (n >= 0 && !parms.rules[n].empty()) {
|
||||
if (n >= 0) {
|
||||
/** check all rules for the same letter **/
|
||||
while (parms.rules[n][0] == c) {
|
||||
/** check whole string **/
|
||||
k = 1; /** number of found letters **/
|
||||
p = 5; /** default priority **/
|
||||
const char*s = parms.rules[n].c_str();
|
||||
s = parms.rules[n];
|
||||
s++; /** important for (see below) "*(s-1)" **/
|
||||
|
||||
while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) &&
|
||||
@ -139,13 +142,13 @@ std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
n0 = parms.hash[(uchar)c0];
|
||||
|
||||
// if (parms.followup && k > 1 && n0 >= 0
|
||||
if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0' && !parms.rules[n0].empty()) {
|
||||
if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0') {
|
||||
/** test follow-up rule for "word[i+k]" **/
|
||||
while (parms.rules[n0][0] == c0) {
|
||||
/** check whole string **/
|
||||
k0 = k;
|
||||
p0 = 5;
|
||||
s = parms.rules[n0].c_str();
|
||||
s = parms.rules[n0];
|
||||
s++;
|
||||
while (*s != '\0' && word[i + k0] == *s &&
|
||||
!isdigit((unsigned char)*s) &&
|
||||
@ -203,9 +206,9 @@ std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
} /** end of follow-up stuff **/
|
||||
|
||||
/** replace string **/
|
||||
s = parms.rules[n + 1].c_str();
|
||||
p0 = (!parms.rules[n].empty() &&
|
||||
strchr(parms.rules[n].c_str() + 1, '<') != NULL)
|
||||
s = parms.rules[n + 1];
|
||||
p0 = (parms.rules[n][0] != '\0' &&
|
||||
strchr(parms.rules[n] + 1, '<') != NULL)
|
||||
? 1
|
||||
: 0;
|
||||
if (p0 == 1 && z == 0) {
|
||||
@ -238,8 +241,8 @@ std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
}
|
||||
/** new "actual letter" **/
|
||||
c = *s;
|
||||
if (!parms.rules[n].empty() &&
|
||||
strstr(parms.rules[n].c_str() + 1, "^^") != NULL) {
|
||||
if (parms.rules[n][0] != '\0' &&
|
||||
strstr(parms.rules[n] + 1, "^^") != NULL) {
|
||||
if (c != '\0') {
|
||||
target.push_back(c);
|
||||
}
|
||||
|
@ -27,8 +27,8 @@
|
||||
Porting from Aspell to Hunspell using C-like structs
|
||||
*/
|
||||
|
||||
#ifndef PHONET_HXX_
|
||||
#define PHONET_HXX_
|
||||
#ifndef __PHONETHXX__
|
||||
#define __PHONETHXX__
|
||||
|
||||
#define HASHSIZE 256
|
||||
#define MAXPHONETLEN 256
|
||||
@ -38,7 +38,9 @@
|
||||
|
||||
struct phonetable {
|
||||
char utf8;
|
||||
std::vector<std::string> rules;
|
||||
cs_info* lang;
|
||||
int num;
|
||||
char** rules;
|
||||
int hash[HASHSIZE];
|
||||
};
|
||||
|
||||
|
@ -90,122 +90,104 @@ RepList::RepList(int n) {
|
||||
|
||||
RepList::~RepList() {
|
||||
for (int i = 0; i < pos; i++) {
|
||||
delete dat[i];
|
||||
free(dat[i]->pattern);
|
||||
free(dat[i]->pattern2);
|
||||
free(dat[i]);
|
||||
}
|
||||
free(dat);
|
||||
}
|
||||
|
||||
int RepList::get_pos() {
|
||||
return pos;
|
||||
}
|
||||
|
||||
replentry* RepList::item(int n) {
|
||||
return dat[n];
|
||||
}
|
||||
|
||||
int RepList::find(const char* word) {
|
||||
int RepList::near(const char* word) {
|
||||
int p1 = 0;
|
||||
int p2 = pos - 1;
|
||||
while (p1 <= p2) {
|
||||
int p2 = pos;
|
||||
while ((p2 - p1) > 1) {
|
||||
int m = (p1 + p2) / 2;
|
||||
int c = strncmp(word, dat[m]->pattern.c_str(), dat[m]->pattern.size());
|
||||
if (c < 0)
|
||||
p2 = m - 1;
|
||||
else if (c > 0)
|
||||
p1 = m + 1;
|
||||
else { // scan back for a longer match
|
||||
for (p1 = m - 1; p1 >= 0; --p1)
|
||||
if (!strncmp(word, dat[p1]->pattern.c_str(), dat[p1]->pattern.size()))
|
||||
m = p1;
|
||||
else if (dat[p1]->pattern.size() < dat[m]->pattern.size())
|
||||
break;
|
||||
return m;
|
||||
}
|
||||
int c = strcmp(word, dat[m]->pattern);
|
||||
if (c <= 0) {
|
||||
if (c < 0)
|
||||
p2 = m;
|
||||
else
|
||||
p1 = p2 = m;
|
||||
} else
|
||||
p1 = m;
|
||||
}
|
||||
return -1;
|
||||
return p1;
|
||||
}
|
||||
|
||||
std::string RepList::replace(const char* word, int ind, bool atstart) {
|
||||
int type = atstart ? 1 : 0;
|
||||
if (ind < 0)
|
||||
return std::string();
|
||||
if (strlen(word) == dat[ind]->pattern.size())
|
||||
type = atstart ? 3 : 2;
|
||||
while (type && dat[ind]->outstrings[type].empty())
|
||||
type = (type == 2 && !atstart) ? 0 : type - 1;
|
||||
return dat[ind]->outstrings[type];
|
||||
}
|
||||
|
||||
int RepList::add(const std::string& in_pat1, const std::string& pat2) {
|
||||
if (pos >= size || in_pat1.empty() || pat2.empty()) {
|
||||
return 1;
|
||||
}
|
||||
// analyse word context
|
||||
int type = 0;
|
||||
std::string pat1(in_pat1);
|
||||
if (pat1[0] == '_') {
|
||||
pat1.erase(0, 1);
|
||||
type = 1;
|
||||
}
|
||||
if (!pat1.empty() && pat1[pat1.size() - 1] == '_') {
|
||||
type = type + 2;
|
||||
pat1.erase(pat1.size() - 1);
|
||||
}
|
||||
mystrrep(pat1, "_", " ");
|
||||
|
||||
// find existing entry
|
||||
int m = find(pat1.c_str());
|
||||
if (m >= 0 && dat[m]->pattern == pat1) {
|
||||
// since already used
|
||||
dat[m]->outstrings[type] = pat2;
|
||||
mystrrep(dat[m]->outstrings[type], "_", " ");
|
||||
return 0;
|
||||
}
|
||||
|
||||
// make a new entry if none exists
|
||||
replentry* r = new replentry;
|
||||
if (r == NULL)
|
||||
return 1;
|
||||
r->pattern = pat1;
|
||||
r->outstrings[type] = pat2;
|
||||
mystrrep(r->outstrings[type], "_", " ");
|
||||
dat[pos++] = r;
|
||||
// sort to the right place in the list
|
||||
int i;
|
||||
for (i = pos - 1; i > 0; i--) {
|
||||
int c = strncmp(r->pattern.c_str(), dat[i-1]->pattern.c_str(), dat[i-1]->pattern.size());
|
||||
if (c > 0)
|
||||
break;
|
||||
else if (c == 0) { // subpatterns match. Patterns can't be identical since would catch earlier
|
||||
for (int j = i - 2; j > 0 && !strncmp(dat[i-1]->pattern.c_str(), dat[j]->pattern.c_str(), dat[i-1]->pattern.size()); --j)
|
||||
if (dat[j]->pattern.size() > r->pattern.size() ||
|
||||
(dat[j]->pattern.size() == r->pattern.size() && strncmp(dat[j]->pattern.c_str(), r->pattern.c_str(), r->pattern.size()) > 0)) {
|
||||
i = j;
|
||||
break;
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
memmove(dat + i + 1, dat + i, (pos - i - 1) * sizeof(replentry *));
|
||||
dat[i] = r;
|
||||
int RepList::match(const char* word, int n) {
|
||||
if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0)
|
||||
return strlen(dat[n]->pattern);
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool RepList::conv(const std::string& in_word, std::string& dest) {
|
||||
int RepList::add(char* pat1, char* pat2) {
|
||||
if (pos >= size || pat1 == NULL || pat2 == NULL)
|
||||
return 1;
|
||||
replentry* r = (replentry*)malloc(sizeof(replentry));
|
||||
if (r == NULL)
|
||||
return 1;
|
||||
r->pattern = mystrrep(pat1, "_", " ");
|
||||
r->pattern2 = mystrrep(pat2, "_", " ");
|
||||
r->start = false;
|
||||
r->end = false;
|
||||
dat[pos++] = r;
|
||||
for (int i = pos - 1; i > 0; i--) {
|
||||
r = dat[i];
|
||||
if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
|
||||
dat[i] = dat[i - 1];
|
||||
dat[i - 1] = r;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RepList::conv(const char* word, char* dest, size_t destsize) {
|
||||
size_t stl = 0;
|
||||
int change = 0;
|
||||
for (size_t i = 0; i < strlen(word); i++) {
|
||||
int n = near(word + i);
|
||||
int l = match(word + i, n);
|
||||
if (l) {
|
||||
size_t replen = strlen(dat[n]->pattern2);
|
||||
if (stl + replen >= destsize)
|
||||
return -1;
|
||||
strcpy(dest + stl, dat[n]->pattern2);
|
||||
stl += replen;
|
||||
i += l - 1;
|
||||
change = 1;
|
||||
} else {
|
||||
if (stl + 1 >= destsize)
|
||||
return -1;
|
||||
dest[stl++] = word[i];
|
||||
}
|
||||
}
|
||||
dest[stl] = '\0';
|
||||
return change;
|
||||
}
|
||||
|
||||
bool RepList::conv(const char* word, std::string& dest) {
|
||||
dest.clear();
|
||||
|
||||
size_t wordlen = in_word.size();
|
||||
const char* word = in_word.c_str();
|
||||
|
||||
bool change = false;
|
||||
for (size_t i = 0; i < wordlen; ++i) {
|
||||
int n = find(word + i);
|
||||
std::string l = replace(word + i, n, i == 0);
|
||||
if (!l.empty()) {
|
||||
dest.append(l);
|
||||
i += dat[n]->pattern.size() - 1;
|
||||
for (size_t i = 0; i < strlen(word); i++) {
|
||||
int n = near(word + i);
|
||||
int l = match(word + i, n);
|
||||
if (l) {
|
||||
dest.append(dat[n]->pattern2);
|
||||
i += l - 1;
|
||||
change = true;
|
||||
} else {
|
||||
dest.push_back(word[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return change;
|
||||
}
|
||||
|
||||
|
@ -72,15 +72,17 @@
|
||||
*/
|
||||
|
||||
/* string replacement list class */
|
||||
#ifndef REPLIST_HXX_
|
||||
#define REPLIST_HXX_
|
||||
#ifndef _REPLIST_HXX_
|
||||
#define _REPLIST_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "w_char.hxx"
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
class RepList {
|
||||
class LIBHUNSPELL_DLL_EXPORTED RepList {
|
||||
private:
|
||||
RepList(const RepList&);
|
||||
RepList& operator=(const RepList&);
|
||||
@ -91,13 +93,15 @@ class RepList {
|
||||
int pos;
|
||||
|
||||
public:
|
||||
explicit RepList(int n);
|
||||
RepList(int n);
|
||||
~RepList();
|
||||
|
||||
int add(const std::string& pat1, const std::string& pat2);
|
||||
int get_pos();
|
||||
int add(char* pat1, char* pat2);
|
||||
replentry* item(int n);
|
||||
int find(const char* word);
|
||||
std::string replace(const char* word, int n, bool atstart);
|
||||
bool conv(const std::string& word, std::string& dest);
|
||||
int near(const char* word);
|
||||
int match(const char* word, int n);
|
||||
int conv(const char* word, char* dest, size_t destsize);
|
||||
bool conv(const char* word, std::string& dest);
|
||||
};
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -71,8 +71,8 @@
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef SUGGESTMGR_HXX_
|
||||
#define SUGGESTMGR_HXX_
|
||||
#ifndef _SUGGESTMGR_HXX_
|
||||
#define _SUGGESTMGR_HXX_
|
||||
|
||||
#define MAX_ROOTS 100
|
||||
#define MAX_WORDS 100
|
||||
@ -91,6 +91,8 @@
|
||||
#define NGRAM_LOWERING (1 << 2)
|
||||
#define NGRAM_WEIGHTED (1 << 3)
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "hashmgr.hxx"
|
||||
@ -99,22 +101,22 @@
|
||||
|
||||
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||
|
||||
class SuggestMgr {
|
||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr {
|
||||
private:
|
||||
SuggestMgr(const SuggestMgr&);
|
||||
SuggestMgr& operator=(const SuggestMgr&);
|
||||
|
||||
private:
|
||||
char* ckey;
|
||||
size_t ckeyl;
|
||||
std::vector<w_char> ckey_utf;
|
||||
int ckeyl;
|
||||
w_char* ckey_utf;
|
||||
|
||||
char* ctry;
|
||||
size_t ctryl;
|
||||
std::vector<w_char> ctry_utf;
|
||||
int ctryl;
|
||||
w_char* ctry_utf;
|
||||
|
||||
AffixMgr* pAMgr;
|
||||
unsigned int maxSug;
|
||||
int maxSug;
|
||||
struct cs_info* csconv;
|
||||
int utf8;
|
||||
int langnum;
|
||||
@ -124,53 +126,62 @@ class SuggestMgr {
|
||||
int complexprefixes;
|
||||
|
||||
public:
|
||||
SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr);
|
||||
SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr);
|
||||
~SuggestMgr();
|
||||
|
||||
void suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug);
|
||||
void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr);
|
||||
int suggest(char*** slst, const char* word, int nsug, int* onlycmpdsug);
|
||||
int ngsuggest(char** wlst, const char* word, int ns, HashMgr** pHMgr, int md);
|
||||
int suggest_auto(char*** slst, const char* word, int nsug);
|
||||
int suggest_stems(char*** slst, const char* word, int nsug);
|
||||
int suggest_pos_stems(char*** slst, const char* word, int nsug);
|
||||
|
||||
std::string suggest_morph(const std::string& word);
|
||||
std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern);
|
||||
char* suggest_morph(const char* word);
|
||||
char* suggest_gen(char** pl, int pln, const char* pattern);
|
||||
char* suggest_morph_for_spelling_error(const char* word);
|
||||
|
||||
private:
|
||||
void testsug(std::vector<std::string>& wlst,
|
||||
const std::string& candidate,
|
||||
int cpdsuggest,
|
||||
int* timer,
|
||||
clock_t* timelimit);
|
||||
int checkword(const std::string& word, int, int*, clock_t*);
|
||||
int testsug(char** wlst,
|
||||
const char* candidate,
|
||||
int wl,
|
||||
int ns,
|
||||
int cpdsuggest,
|
||||
int* timer,
|
||||
clock_t* timelimit);
|
||||
int checkword(const char*, int, int, int*, clock_t*);
|
||||
int check_forbidden(const char*, int);
|
||||
|
||||
void capchars(std::vector<std::string>&, const char*, int);
|
||||
int replchars(std::vector<std::string>&, const char*, int);
|
||||
int doubletwochars(std::vector<std::string>&, const char*, int);
|
||||
int forgotchar(std::vector<std::string>&, const char*, int);
|
||||
int swapchar(std::vector<std::string>&, const char*, int);
|
||||
int longswapchar(std::vector<std::string>&, const char*, int);
|
||||
int movechar(std::vector<std::string>&, const char*, int);
|
||||
int extrachar(std::vector<std::string>&, const char*, int);
|
||||
int badcharkey(std::vector<std::string>&, const char*, int);
|
||||
int badchar(std::vector<std::string>&, const char*, int);
|
||||
int twowords(std::vector<std::string>&, const char*, int);
|
||||
int capchars(char**, const char*, int, int);
|
||||
int replchars(char**, const char*, int, int);
|
||||
int doubletwochars(char**, const char*, int, int);
|
||||
int forgotchar(char**, const char*, int, int);
|
||||
int swapchar(char**, const char*, int, int);
|
||||
int longswapchar(char**, const char*, int, int);
|
||||
int movechar(char**, const char*, int, int);
|
||||
int extrachar(char**, const char*, int, int);
|
||||
int badcharkey(char**, const char*, int, int);
|
||||
int badchar(char**, const char*, int, int);
|
||||
int twowords(char**, const char*, int, int);
|
||||
int fixstems(char**, const char*, int);
|
||||
|
||||
void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
|
||||
int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int);
|
||||
int movechar_utf(std::vector<std::string>&, const w_char*, int, int);
|
||||
int capchars_utf(char**, const w_char*, int wl, int, int);
|
||||
int doubletwochars_utf(char**, const w_char*, int wl, int, int);
|
||||
int forgotchar_utf(char**, const w_char*, int wl, int, int);
|
||||
int extrachar_utf(char**, const w_char*, int wl, int, int);
|
||||
int badcharkey_utf(char**, const w_char*, int wl, int, int);
|
||||
int badchar_utf(char**, const w_char*, int wl, int, int);
|
||||
int swapchar_utf(char**, const w_char*, int wl, int, int);
|
||||
int longswapchar_utf(char**, const w_char*, int, int, int);
|
||||
int movechar_utf(char**, const w_char*, int, int, int);
|
||||
|
||||
int mapchars(std::vector<std::string>&, const char*, int);
|
||||
int mapchars(char**, const char*, int, int);
|
||||
int map_related(const char*,
|
||||
std::string&,
|
||||
int,
|
||||
std::vector<std::string>& wlst,
|
||||
char** wlst,
|
||||
int,
|
||||
int,
|
||||
const mapentry*,
|
||||
int,
|
||||
const std::vector<mapentry>&,
|
||||
int*,
|
||||
clock_t*);
|
||||
int ngram(int n, const std::string& s1, const std::string& s2, int opt);
|
||||
@ -181,7 +192,7 @@ class SuggestMgr {
|
||||
void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
|
||||
int lcslen(const char* s, const char* s2);
|
||||
int lcslen(const std::string& s, const std::string& s2);
|
||||
std::string suggest_hentry_gen(hentry* rv, const char* pattern);
|
||||
char* suggest_hentry_gen(hentry* rv, const char* pattern);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -38,10 +38,8 @@
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef W_CHAR_HXX_
|
||||
#define W_CHAR_HXX_
|
||||
|
||||
#include <string>
|
||||
#ifndef __WCHARHXX__
|
||||
#define __WCHARHXX__
|
||||
|
||||
#ifndef GCC
|
||||
struct w_char {
|
||||
@ -68,8 +66,10 @@ struct __attribute__((packed)) w_char {
|
||||
|
||||
// two character arrays
|
||||
struct replentry {
|
||||
std::string pattern;
|
||||
std::string outstrings[4]; // med, ini, fin, isol
|
||||
char* pattern;
|
||||
char* pattern2;
|
||||
bool start;
|
||||
bool end;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -5,7 +5,7 @@
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
include('/ipc/chromium/chromium-config.mozbuild')
|
||||
UNIFIED_SOURCES += [
|
||||
SOURCES += [
|
||||
'mozEnglishWordUtils.cpp',
|
||||
'mozInlineSpellChecker.cpp',
|
||||
'mozInlineSpellWordUtil.cpp',
|
||||
|
Loading…
Reference in New Issue
Block a user