Backed out changeset 25c7efa29d66 (bug 1318955) for breaking Hungarian spellchecking.

--HG--
extra : rebase_source : 8267bc48037eaf64d26ff93d60cd7998cb26d0ac
This commit is contained in:
Ryan VanderMeulen 2016-11-28 21:44:06 -05:00
parent d570415e50
commit 906fc553b2
32 changed files with 4166 additions and 3533 deletions

View File

@ -4,7 +4,7 @@
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
UNIFIED_SOURCES += [
SOURCES += [
'mozHunspell.cpp',
'mozHunspellDirProvider.cpp',
'RemoteSpellCheckEngineChild.cpp',

View File

@ -200,7 +200,7 @@ NS_IMETHODIMP mozHunspell::SetDictionary(const char16_t *aDictionary)
if (!mHunspell)
return NS_ERROR_OUT_OF_MEMORY;
nsAutoCString label(mHunspell->get_dict_encoding().c_str());
nsDependentCString label(mHunspell->get_dic_encoding());
nsAutoCString encoding;
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
return NS_ERROR_UCONV_NOCONV;
@ -480,8 +480,7 @@ mozHunspell::LoadDictionariesFromDir(nsIFile* aDir)
return NS_OK;
}
nsresult
mozHunspell::ConvertCharset(const char16_t* aStr, std::string* aDst)
nsresult mozHunspell::ConvertCharset(const char16_t* aStr, char ** aDst)
{
NS_ENSURE_ARG_POINTER(aDst);
NS_ENSURE_TRUE(mEncoder, NS_ERROR_NULL_POINTER);
@ -491,13 +490,12 @@ mozHunspell::ConvertCharset(const char16_t* aStr, std::string* aDst)
nsresult rv = mEncoder->GetMaxLength(aStr, inLength, &outLength);
NS_ENSURE_SUCCESS(rv, rv);
aDst->resize(outLength);
*aDst = (char *) moz_xmalloc(sizeof(char) * (outLength+1));
NS_ENSURE_TRUE(*aDst, NS_ERROR_OUT_OF_MEMORY);
char* dst = &aDst->operator[](0);
rv = mEncoder->Convert(aStr, &inLength, dst, &outLength);
if (NS_SUCCEEDED(rv)) {
aDst->resize(outLength);
}
rv = mEncoder->Convert(aStr, &inLength, *aDst, &outLength);
if (NS_SUCCEEDED(rv))
(*aDst)[outLength] = '\0';
return rv;
}
@ -520,11 +518,12 @@ NS_IMETHODIMP mozHunspell::Check(const char16_t *aWord, bool *aResult)
NS_ENSURE_ARG_POINTER(aResult);
NS_ENSURE_TRUE(mHunspell, NS_ERROR_FAILURE);
std::string charsetWord;
nsresult rv = ConvertCharset(aWord, &charsetWord);
nsXPIDLCString charsetWord;
nsresult rv = ConvertCharset(aWord, getter_Copies(charsetWord));
NS_ENSURE_SUCCESS(rv, rv);
*aResult = mHunspell->spell(charsetWord);
*aResult = !!mHunspell->spell(charsetWord);
if (!*aResult && mPersonalDictionary)
rv = mPersonalDictionary->Check(aWord, mLanguage.get(), aResult);
@ -541,12 +540,12 @@ NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestio
nsresult rv;
*aSuggestionCount = 0;
std::string charsetWord;
rv = ConvertCharset(aWord, &charsetWord);
nsXPIDLCString charsetWord;
rv = ConvertCharset(aWord, getter_Copies(charsetWord));
NS_ENSURE_SUCCESS(rv, rv);
std::vector<std::string> suggestions = mHunspell->suggest(charsetWord);
*aSuggestionCount = static_cast<uint32_t>(suggestions.size());
char ** wlst;
*aSuggestionCount = mHunspell->suggest(&wlst, charsetWord);
if (*aSuggestionCount) {
*aSuggestions = (char16_t **)moz_xmalloc(*aSuggestionCount * sizeof(char16_t *));
@ -554,15 +553,15 @@ NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestio
uint32_t index = 0;
for (index = 0; index < *aSuggestionCount && NS_SUCCEEDED(rv); ++index) {
// Convert the suggestion to utf16
int32_t inLength = suggestions[index].size();
int32_t inLength = strlen(wlst[index]);
int32_t outLength;
rv = mDecoder->GetMaxLength(suggestions[index].c_str(), inLength, &outLength);
rv = mDecoder->GetMaxLength(wlst[index], inLength, &outLength);
if (NS_SUCCEEDED(rv))
{
(*aSuggestions)[index] = (char16_t *) moz_xmalloc(sizeof(char16_t) * (outLength+1));
if ((*aSuggestions)[index])
{
rv = mDecoder->Convert(suggestions[index].c_str(), &inLength, (*aSuggestions)[index], &outLength);
rv = mDecoder->Convert(wlst[index], &inLength, (*aSuggestions)[index], &outLength);
if (NS_SUCCEEDED(rv))
(*aSuggestions)[index][outLength] = 0;
}
@ -578,6 +577,7 @@ NS_IMETHODIMP mozHunspell::Suggest(const char16_t *aWord, char16_t ***aSuggestio
rv = NS_ERROR_OUT_OF_MEMORY;
}
NS_FREE_XPCOM_ALLOCATED_POINTER_ARRAY(*aSuggestionCount, wlst);
return rv;
}

View File

@ -99,7 +99,7 @@ public:
void LoadDictionaryList(bool aNotifyChildProcesses);
// helper method for converting a word to the charset of the dictionary
nsresult ConvertCharset(const char16_t* aStr, std::string* aDst);
nsresult ConvertCharset(const char16_t* aStr, char ** aDst);
NS_DECL_NSIMEMORYREPORTER

View File

@ -1,2 +1,2 @@
Hunspell Version: 1.5.0
Hunspell Version: 1.4.1
Additional Patches: See patches directory.

View File

@ -79,7 +79,33 @@
#include "affentry.hxx"
#include "csutil.hxx"
AffEntry::~AffEntry() {
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
// register affix manager
: pmyMgr(pmgr),
next(NULL),
nexteq(NULL),
nextne(NULL),
flgnxt(NULL) {
// set up its initial values
aflag = dp->aflag; // flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
numconds = dp->numconds; // length of the condition
opts = dp->opts; // cross product flag
// then copy over all of the conditions
if (opts & aeLONGCOND) {
memcpy(c.conds, dp->c.l.conds1, MAXCONDLEN_1);
c.l.conds2 = dp->c.l.conds2;
} else
memcpy(c.conds, dp->c.conds, MAXCONDLEN);
morphcode = dp->morphcode;
contclass = dp->contclass;
contclasslen = dp->contclasslen;
}
PfxEntry::~PfxEntry() {
aflag = 0;
pmyMgr = NULL;
if (opts & aeLONGCOND)
free(c.l.conds2);
if (morphcode && !(opts & aeALIASM))
@ -88,26 +114,17 @@ AffEntry::~AffEntry() {
free(contclass);
}
PfxEntry::PfxEntry(AffixMgr* pmgr)
// register affix manager
: pmyMgr(pmgr),
next(NULL),
nexteq(NULL),
nextne(NULL),
flgnxt(NULL) {
}
// add prefix to this word assuming conditions hold
std::string PfxEntry::add(const char* word, size_t len) {
std::string result;
char* PfxEntry::add(const char* word, size_t len) {
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
(len >= numconds) && test_condition(word) &&
(!strip.size() || (strncmp(word, strip.c_str(), strip.size()) == 0))) {
/* we have a match so add prefix */
result.assign(appnd);
result.append(word + strip.size());
std::string tword(appnd);
tword.append(word + strip.size());
return mystrdup(tword.c_str());
}
return result;
return NULL;
}
inline char* PfxEntry::nextchar(char* p) {
@ -259,7 +276,8 @@ struct hentry* PfxEntry::checkword(const char* word,
// if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag, in_compound);
NULL, 0, NULL, FLAG_NULL, needflag,
in_compound);
if (he)
return he;
}
@ -273,6 +291,8 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
int len,
char in_compound,
const FLAG needflag) {
struct hentry* he; // hash entry of root word or NULL
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
@ -304,9 +324,8 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
// hash entry of root word or NULL
struct hentry* he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
needflag);
he = pmyMgr->suffix_check_twosfx(tmpword.c_str(), tmpl, aeXPRODUCT, this,
needflag);
if (he)
return he;
}
@ -316,15 +335,15 @@ struct hentry* PfxEntry::check_twosfx(const char* word,
}
// check if this prefix entry matches
std::string PfxEntry::check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag) {
std::string result;
char* PfxEntry::check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag) {
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
// and if there are enough chars in root word and added back strip chars
// to meet the number of characters conditions, then test it
int tmpl = len - appnd.size(); // length of tmpword
if ((tmpl > 0 || (tmpl == 0 && pmyMgr->get_fullstrip())) &&
@ -351,21 +370,22 @@ std::string PfxEntry::check_twosfx_morph(const char* word,
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
result = pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
aeXPRODUCT,
this, needflag);
return pmyMgr->suffix_check_twosfx_morph(tmpword.c_str(), tmpl,
aeXPRODUCT,
this, needflag);
}
}
}
return result;
return NULL;
}
// check if this prefix entry matches
std::string PfxEntry::check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag) {
std::string result;
char* PfxEntry::check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag) {
struct hentry* he; // hash entry of root word or NULL
char* st;
// on entry prefix is 0 length or already matches the beginning of the word.
// So if the remaining root word has positive length
@ -391,8 +411,9 @@ std::string PfxEntry::check_morph(const char* word,
// root word in the dictionary
if (test_condition(tmpword.c_str())) {
std::string result;
tmpl += strip.size();
struct hentry* he; // hash entry of root word or NULL
if ((he = pmyMgr->lookup(tmpword.c_str())) != NULL) {
do {
if (TESTAFF(he->astr, aflag, he->alen) &&
@ -434,19 +455,23 @@ std::string PfxEntry::check_morph(const char* word,
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag);
if (!st.empty()) {
st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag);
if (st) {
result.append(st);
free(st);
}
}
if (!result.empty())
return mystrdup(result.c_str());
}
}
return result;
return NULL;
}
SfxEntry::SfxEntry(AffixMgr* pmgr)
SfxEntry::SfxEntry(AffixMgr* pmgr, affentry* dp)
: pmyMgr(pmgr) // register affix manager
,
next(NULL),
@ -456,21 +481,50 @@ SfxEntry::SfxEntry(AffixMgr* pmgr)
l_morph(NULL),
r_morph(NULL),
eq_morph(NULL) {
// set up its initial values
aflag = dp->aflag; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
numconds = dp->numconds; // length of the condition
opts = dp->opts; // cross product flag
// then copy over all of the conditions
if (opts & aeLONGCOND) {
memcpy(c.l.conds1, dp->c.l.conds1, MAXCONDLEN_1);
c.l.conds2 = dp->c.l.conds2;
} else
memcpy(c.conds, dp->c.conds, MAXCONDLEN);
rappnd = appnd;
reverseword(rappnd);
morphcode = dp->morphcode;
contclass = dp->contclass;
contclasslen = dp->contclasslen;
}
SfxEntry::~SfxEntry() {
aflag = 0;
pmyMgr = NULL;
if (opts & aeLONGCOND)
free(c.l.conds2);
if (morphcode && !(opts & aeALIASM))
free(morphcode);
if (contclass && !(opts & aeALIASF))
free(contclass);
}
// add suffix to this word assuming conditions hold
std::string SfxEntry::add(const char* word, size_t len) {
std::string result;
char* SfxEntry::add(const char* word, size_t len) {
/* make sure all conditions match */
if ((len > strip.size() || (len == 0 && pmyMgr->get_fullstrip())) &&
(len >= numconds) && test_condition(word + len, word) &&
(!strip.size() ||
(strcmp(word + len - strip.size(), strip.c_str()) == 0))) {
result.assign(word);
std::string tword(word);
/* we have a match so add suffix */
result.replace(len - strip.size(), std::string::npos, appnd);
tword.replace(len - strip.size(), std::string::npos, appnd);
return mystrdup(tword.c_str());
}
return result;
return NULL;
}
inline char* SfxEntry::nextchar(char* p) {
@ -615,6 +669,9 @@ struct hentry* SfxEntry::checkword(const char* word,
int len,
int optflags,
PfxEntry* ppfx,
char** wlst,
int maxSug,
int* ns,
const FLAG cclass,
const FLAG needflag,
const FLAG badflag) {
@ -685,6 +742,27 @@ struct hentry* SfxEntry::checkword(const char* word,
return he;
he = he->next_homonym; // check homonyms
} while (he);
// obsolote stemming code (used only by the
// experimental SuffixMgr:suggest_pos_stems)
// store resulting root in wlst
} else if (wlst && (*ns < maxSug)) {
int cwrd = 1;
for (int k = 0; k < *ns; k++)
if (strcmp(tmpword, wlst[k]) == 0) {
cwrd = 0;
break;
}
if (cwrd) {
wlst[*ns] = mystrdup(tmpword);
if (wlst[*ns] == NULL) {
for (int j = 0; j < *ns; j++)
free(wlst[j]);
*ns = -1;
return NULL;
}
(*ns)++;
}
}
}
}
@ -697,6 +775,7 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
int optflags,
PfxEntry* ppfx,
const FLAG needflag) {
struct hentry* he; // hash entry pointer
PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
@ -734,18 +813,17 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
// if all conditions are met then recall suffix_check
if (test_condition(end, beg)) {
struct hentry* he; // hash entry pointer
if (ppfx) {
// handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
(FLAG)aflag, needflag, IN_CPD_NOT);
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
(FLAG)aflag, needflag);
else
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx,
(FLAG)aflag, needflag, IN_CPD_NOT);
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, optflags, ppfx, NULL, 0,
NULL, (FLAG)aflag, needflag);
} else {
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL,
(FLAG)aflag, needflag, IN_CPD_NOT);
he = pmyMgr->suffix_check(tmpword.c_str(), tmpl, 0, NULL, NULL, 0, NULL,
(FLAG)aflag, needflag);
}
if (he)
return he;
@ -755,20 +833,23 @@ struct hentry* SfxEntry::check_twosfx(const char* word,
}
// see if two-level suffix is present in the word
std::string SfxEntry::check_twosfx_morph(const char* word,
int len,
int optflags,
PfxEntry* ppfx,
const FLAG needflag) {
char* SfxEntry::check_twosfx_morph(const char* word,
int len,
int optflags,
PfxEntry* ppfx,
const FLAG needflag) {
PfxEntry* ep = ppfx;
char* st;
std::string result;
char result[MAXLNLEN];
*result = '\0';
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
if ((optflags & aeXPRODUCT) != 0 && (opts & aeXPRODUCT) == 0)
return result;
return NULL;
// upon entry suffix is 0 length or already matches the end of the word.
// So if the remaining root word has positive length
@ -802,34 +883,40 @@ std::string SfxEntry::check_twosfx_morph(const char* word,
if (ppfx) {
// handle conditional suffix
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
needflag);
if (!st.empty()) {
st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag,
needflag);
if (st) {
if (ppfx->getMorph()) {
result.append(ppfx->getMorph());
result.append(" ");
mystrcat(result, ppfx->getMorph(), MAXLNLEN);
mystrcat(result, " ", MAXLNLEN);
}
result.append(st);
mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
} else {
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
needflag);
if (!st.empty()) {
result.append(st);
st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, optflags, ppfx, aflag,
needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
}
} else {
std::string st = pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
if (!st.empty()) {
result.append(st);
st =
pmyMgr->suffix_check_morph(tmpword.c_str(), tmpl, 0, NULL, aflag, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
free(st);
mychomp(result);
}
}
if (*result)
return mystrdup(result);
}
}
return result;
return NULL;
}
// get next homonym with same affix
@ -861,11 +948,6 @@ struct hentry* SfxEntry::get_next_homonym(struct hentry* he,
return NULL;
}
void SfxEntry::initReverseWord() {
rappnd = appnd;
reverseword(rappnd);
}
#if 0
Appendix: Understanding Affix Code

View File

@ -71,8 +71,10 @@
* SUCH DAMAGE.
*/
#ifndef AFFIX_HXX_
#define AFFIX_HXX_
#ifndef _AFFIX_HXX_
#define _AFFIX_HXX_
#include "hunvisapi.h"
#include "atypes.hxx"
#include "baseaffix.hxx"
@ -80,7 +82,7 @@
/* A Prefix Entry */
class PfxEntry : public AffEntry {
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry {
private:
PfxEntry(const PfxEntry&);
PfxEntry& operator=(const PfxEntry&);
@ -94,9 +96,10 @@ class PfxEntry : public AffEntry {
PfxEntry* flgnxt;
public:
explicit PfxEntry(AffixMgr* pmgr);
PfxEntry(AffixMgr* pmgr, affentry* dp);
~PfxEntry();
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
struct hentry* checkword(const char* word,
int len,
char in_compound,
@ -107,19 +110,19 @@ class PfxEntry : public AffEntry {
char in_compound,
const FLAG needflag = FLAG_NULL);
std::string check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
char* check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
std::string check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
char* check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
FLAG getFlag() { return aflag; }
const char* getKey() { return appnd.c_str(); }
std::string add(const char* word, size_t len);
inline FLAG getFlag() { return aflag; }
inline const char* getKey() { return appnd.c_str(); }
char* add(const char* word, size_t len);
inline short getKeyLen() { return appnd.size(); }
@ -144,7 +147,7 @@ class PfxEntry : public AffEntry {
/* A Suffix Entry */
class SfxEntry : public AffEntry {
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry {
private:
SfxEntry(const SfxEntry&);
SfxEntry& operator=(const SfxEntry&);
@ -163,16 +166,20 @@ class SfxEntry : public AffEntry {
SfxEntry* eq_morph;
public:
explicit SfxEntry(AffixMgr* pmgr);
SfxEntry(AffixMgr* pmgr, affentry* dp);
~SfxEntry();
bool allowCross() const { return ((opts & aeXPRODUCT) != 0); }
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
struct hentry* checkword(const char* word,
int len,
int optflags,
PfxEntry* ppfx,
const FLAG cclass,
const FLAG needflag,
const FLAG badflag);
char** wlst,
int maxSug,
int* ns,
const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL,
const FLAG badflag = FLAG_NULL);
struct hentry* check_twosfx(const char* word,
int len,
@ -180,11 +187,11 @@ class SfxEntry : public AffEntry {
PfxEntry* ppfx,
const FLAG needflag = FLAG_NULL);
std::string check_twosfx_morph(const char* word,
int len,
int optflags,
PfxEntry* ppfx,
const FLAG needflag = FLAG_NULL);
char* check_twosfx_morph(const char* word,
int len,
int optflags,
PfxEntry* ppfx,
const FLAG needflag = FLAG_NULL);
struct hentry* get_next_homonym(struct hentry* he);
struct hentry* get_next_homonym(struct hentry* word,
int optflags,
@ -192,9 +199,9 @@ class SfxEntry : public AffEntry {
const FLAG cclass,
const FLAG needflag);
FLAG getFlag() { return aflag; }
const char* getKey() { return rappnd.c_str(); }
std::string add(const char* word, size_t len);
inline FLAG getFlag() { return aflag; }
inline const char* getKey() { return rappnd.c_str(); }
char* add(const char* word, size_t len);
inline const char* getMorph() { return morphcode; }
@ -217,7 +224,6 @@ class SfxEntry : public AffEntry {
inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
void initReverseWord();
inline char* nextchar(char* p);
inline int test_condition(const char* st, const char* begin);

File diff suppressed because it is too large Load Diff

View File

@ -71,13 +71,14 @@
* SUCH DAMAGE.
*/
#ifndef AFFIXMGR_HXX_
#define AFFIXMGR_HXX_
#ifndef _AFFIXMGR_HXX_
#define _AFFIXMGR_HXX_
#include "hunvisapi.h"
#include <stdio.h>
#include <string>
#include <vector>
#include "atypes.hxx"
#include "baseaffix.hxx"
@ -92,16 +93,17 @@
class PfxEntry;
class SfxEntry;
class AffixMgr {
class LIBHUNSPELL_DLL_EXPORTED AffixMgr {
PfxEntry* pStart[SETSIZE];
SfxEntry* sStart[SETSIZE];
PfxEntry* pFlag[SETSIZE];
SfxEntry* sFlag[SETSIZE];
const std::vector<HashMgr*>& alldic;
const HashMgr* pHMgr;
std::string keystring;
std::string trystring;
std::string encoding;
HashMgr* pHMgr;
HashMgr** alldic;
int* maxdic;
char* keystring;
char* trystring;
char* encoding;
struct cs_info* csconv;
int utf8;
int complexprefixes;
@ -123,19 +125,19 @@ class AffixMgr {
FLAG nongramsuggest;
FLAG needaffix;
int cpdmin;
bool parsedrep;
std::vector<replentry> reptable;
int numrep;
replentry* reptable;
RepList* iconvtable;
RepList* oconvtable;
bool parsedmaptable;
std::vector<mapentry> maptable;
bool parsedbreaktable;
std::vector<std::string> breaktable;
bool parsedcheckcpd;
std::vector<patentry> checkcpdtable;
int nummap;
mapentry* maptable;
int numbreak;
char** breaktable;
int numcheckcpd;
patentry* checkcpdtable;
int simplifiedcpd;
bool parseddefcpd;
std::vector<flagentry> defcpdtable;
int numdefcpd;
flagentry* defcpdtable;
phonetable* phone;
int maxngramsugs;
int maxcpdsugs;
@ -145,9 +147,10 @@ class AffixMgr {
int sugswithdots;
int cpdwordmax;
int cpdmaxsyllable;
std::string cpdvowels; // vowels (for calculating of Hungarian compounding limit,
std::vector<w_char> cpdvowels_utf16; //vowels for UTF-8 encoding
std::string cpdsyllablenum; // syllable count incrementing flag
char* cpdvowels;
w_char* cpdvowels_utf16;
int cpdvowels_utf16_len;
char* cpdsyllablenum;
const char* pfxappnd; // BUG: not stateless
const char* sfxappnd; // BUG: not stateless
int sfxextra; // BUG: not stateless
@ -156,12 +159,12 @@ class AffixMgr {
SfxEntry* sfx; // BUG: not stateless
PfxEntry* pfx; // BUG: not stateless
int checknum;
std::string wordchars; // letters + spec. word characters
char* wordchars;
std::vector<w_char> wordchars_utf16;
std::string ignorechars; // letters + spec. word characters
char* ignorechars;
std::vector<w_char> ignorechars_utf16;
std::string version; // affix and dictionary file version string
std::string lang; // language
char* version;
char* lang;
int langnum;
FLAG lemma_present;
FLAG circumfix;
@ -179,7 +182,7 @@ class AffixMgr {
// affix)
public:
AffixMgr(const char* affpath, const std::vector<HashMgr*>& ptr, const char* key = NULL);
AffixMgr(const char* affpath, HashMgr** ptr, int* md, const char* key = NULL);
~AffixMgr();
struct hentry* affix_check(const char* word,
int len,
@ -199,6 +202,9 @@ class AffixMgr {
int len,
int sfxopts,
PfxEntry* ppfx,
char** wlst,
int maxSug,
int* ns,
const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
@ -208,39 +214,39 @@ class AffixMgr {
PfxEntry* ppfx,
const FLAG needflag = FLAG_NULL);
std::string affix_check_morph(const char* word,
int len,
const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
std::string prefix_check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
std::string suffix_check_morph(const char* word,
int len,
int sfxopts,
PfxEntry* ppfx,
const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
char* affix_check_morph(const char* word,
int len,
const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
char* prefix_check_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
char* suffix_check_morph(const char* word,
int len,
int sfxopts,
PfxEntry* ppfx,
const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
std::string prefix_check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
std::string suffix_check_twosfx_morph(const char* word,
int len,
int sfxopts,
PfxEntry* ppfx,
const FLAG needflag = FLAG_NULL);
char* prefix_check_twosfx_morph(const char* word,
int len,
char in_compound,
const FLAG needflag = FLAG_NULL);
char* suffix_check_twosfx_morph(const char* word,
int len,
int sfxopts,
PfxEntry* ppfx,
const FLAG needflag = FLAG_NULL);
std::string morphgen(const char* ts,
int wl,
const unsigned short* ap,
unsigned short al,
const char* morph,
const char* targetmorph,
int level);
char* morphgen(const char* ts,
int wl,
const unsigned short* ap,
unsigned short al,
const char* morph,
const char* targetmorph,
int level);
int expand_rootword(struct guessword* wlst,
int maxn,
@ -267,7 +273,8 @@ class AffixMgr {
int cpdcase_check(const char* word, int len);
inline int candidate_check(const char* word, int len);
void setcminmax(int* cmin, int* cmax, const char* word, int len);
struct hentry* compound_check(const std::string& word,
struct hentry* compound_check(const char* word,
int len,
short wordnum,
short numsyllable,
short maxwordnum,
@ -287,37 +294,47 @@ class AffixMgr {
hentry** words,
hentry** rwords,
char hu_mov_rule,
std::string& result,
const std::string* partresult);
char** result,
char* partresult);
std::vector<std::string> get_suffix_words(short unsigned* suff,
int get_suffix_words(short unsigned* suff,
int len,
const char* root_word);
const char* root_word,
char** slst);
struct hentry* lookup(const char* word);
const std::vector<replentry>& get_reptable() const;
int get_numrep() const;
struct replentry* get_reptable() const;
RepList* get_iconvtable() const;
RepList* get_oconvtable() const;
struct phonetable* get_phonetable() const;
const std::vector<mapentry>& get_maptable() const;
const std::vector<std::string>& get_breaktable() const;
const std::string& get_encoding();
int get_nummap() const;
struct mapentry* get_maptable() const;
int get_numbreak() const;
char** get_breaktable() const;
char* get_encoding();
int get_langnum() const;
char* get_key_string();
char* get_try_string() const;
const std::string& get_wordchars() const;
const char* get_wordchars() const;
const std::vector<w_char>& get_wordchars_utf16() const;
const char* get_ignore() const;
char* get_ignore() const;
const std::vector<w_char>& get_ignore_utf16() const;
int get_compound() const;
FLAG get_compoundflag() const;
FLAG get_compoundbegin() const;
FLAG get_forbiddenword() const;
FLAG get_nosuggest() const;
FLAG get_nongramsuggest() const;
FLAG get_needaffix() const;
FLAG get_onlyincompound() const;
FLAG get_compoundroot() const;
FLAG get_lemma_present() const;
int get_checknum() const;
const char* get_prefix() const;
const char* get_suffix() const;
const char* get_derived() const;
const std::string& get_version() const;
const char* get_version() const;
int have_contclass() const;
int get_utf8() const;
int get_complexprefixes() const;
@ -338,25 +355,26 @@ class AffixMgr {
private:
int parse_file(const char* affpath, const char* key);
bool parse_flag(const std::string& line, unsigned short* out, FileMgr* af);
bool parse_num(const std::string& line, int* out, FileMgr* af);
bool parse_cpdsyllable(const std::string& line, FileMgr* af);
bool parse_reptable(const std::string& line, FileMgr* af);
bool parse_convtable(const std::string& line,
int parse_flag(char* line, unsigned short* out, FileMgr* af);
int parse_num(char* line, int* out, FileMgr* af);
int parse_cpdsyllable(char* line, FileMgr* af);
int parse_reptable(char* line, FileMgr* af);
int parse_convtable(char* line,
FileMgr* af,
RepList** rl,
const std::string& keyword);
bool parse_phonetable(const std::string& line, FileMgr* af);
bool parse_maptable(const std::string& line, FileMgr* af);
bool parse_breaktable(const std::string& line, FileMgr* af);
bool parse_checkcpdtable(const std::string& line, FileMgr* af);
bool parse_defcpdtable(const std::string& line, FileMgr* af);
bool parse_affix(const std::string& line, const char at, FileMgr* af, char* dupflags);
const char* keyword);
int parse_phonetable(char* line, FileMgr* af);
int parse_maptable(char* line, FileMgr* af);
int parse_breaktable(char* line, FileMgr* af);
int parse_checkcpdtable(char* line, FileMgr* af);
int parse_defcpdtable(char* line, FileMgr* af);
int parse_affix(char* line, const char at, FileMgr* af, char* dupflags);
void reverse_condition(std::string&);
void debugflag(char* result, unsigned short flag);
std::string& debugflag(std::string& result, unsigned short flag);
int condlen(const char*);
int encodeit(AffEntry& entry, const char* cs);
int encodeit(affentry& entry, const char* cs);
int build_pfxtree(PfxEntry* pfxptr);
int build_sfxtree(SfxEntry* sfxptr);
int process_pfx_order();

View File

@ -38,8 +38,8 @@
*
* ***** END LICENSE BLOCK ***** */
#ifndef ATYPES_HXX_
#define ATYPES_HXX_
#ifndef _ATYPES_HXX_
#define _ATYPES_HXX_
#ifndef HUNSPELL_WARNING
#include <stdio.h>
@ -63,7 +63,7 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
#define SETSIZE 256
#define CONTSIZE 65536
// AffEntry options
// affentry options
#define aeXPRODUCT (1 << 0)
#define aeUTF8 (1 << 1)
#define aeALIASF (1 << 2)
@ -85,6 +85,8 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
#define SPELL_ORIGCAP (1 << 5)
#define SPELL_WARN (1 << 6)
#define MAXLNLEN 8192
#define MINCPDLEN 3
#define MAXCOMPOUND 10
#define MAXCONDLEN 20
@ -98,25 +100,46 @@ static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
struct affentry {
std::string strip;
std::string appnd;
char numconds;
char opts;
unsigned short aflag;
unsigned short* contclass;
short contclasslen;
union {
char conds[MAXCONDLEN];
struct {
char conds1[MAXCONDLEN_1];
char* conds2;
} l;
} c;
char* morphcode;
};
struct guessword {
char* word;
bool allow;
char* orig;
};
typedef std::vector<std::string> mapentry;
typedef std::vector<FLAG> flagentry;
struct mapentry {
char** set;
int len;
};
struct flagentry {
FLAG* def;
int len;
};
struct patentry {
std::string pattern;
std::string pattern2;
std::string pattern3;
char* pattern;
char* pattern2;
char* pattern3;
FLAG cond;
FLAG cond2;
patentry()
: cond(FLAG_NULL)
, cond2(FLAG_NULL) {
}
};
#endif

View File

@ -38,17 +38,18 @@
*
* ***** END LICENSE BLOCK ***** */
#ifndef BASEAFF_HXX_
#define BASEAFF_HXX_
#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
#include "hunvisapi.h"
#include <string>
class AffEntry {
class LIBHUNSPELL_DLL_EXPORTED AffEntry {
private:
AffEntry(const AffEntry&);
AffEntry& operator=(const AffEntry&);
public:
protected:
AffEntry()
: numconds(0),
opts(0),
@ -56,7 +57,6 @@ class AffEntry {
morphcode(0),
contclass(NULL),
contclasslen(0) {}
virtual ~AffEntry();
std::string appnd;
std::string strip;
unsigned char numconds;

View File

@ -76,7 +76,6 @@
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#include <sstream>
#include "csutil.hxx"
#include "atypes.hxx"
@ -123,24 +122,26 @@ static struct unicode_info2* utf_tbl = NULL;
static int utf_tbl_count =
0; // utf_tbl can be used by multiple Hunspell instances
void myopen(std::ifstream& stream, const char* path, std::ios_base::openmode mode)
{
#if defined(_WIN32) && defined(_MSC_VER)
FILE* myfopen(const char* path, const char* mode) {
#ifdef _WIN32
#define WIN32_LONG_PATH_PREFIX "\\\\?\\"
if (strncmp(path, WIN32_LONG_PATH_PREFIX, 4) == 0) {
int len = MultiByteToWideChar(CP_UTF8, 0, path, -1, NULL, 0);
wchar_t* buff = new wchar_t[len];
wchar_t* buff2 = new wchar_t[len];
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
if (_wfullpath(buff2, buff, len) != NULL) {
stream.open(buff2, mode);
wchar_t* buff = (wchar_t*)malloc(len * sizeof(wchar_t));
wchar_t* buff2 = (wchar_t*)malloc(len * sizeof(wchar_t));
FILE* f = NULL;
if (buff && buff2) {
MultiByteToWideChar(CP_UTF8, 0, path, -1, buff, len);
if (_wfullpath(buff2, buff, len) != NULL) {
f = _wfopen(buff2, (strcmp(mode, "r") == 0) ? L"r" : L"rb");
}
free(buff);
free(buff2);
}
delete [] buff;
delete [] buff2;
return f;
}
else
#endif
stream.open(path, mode);
return fopen(path, mode);
}
std::string& u16_u8(std::string& dest, const std::vector<w_char>& src) {
@ -217,7 +218,7 @@ int u8_u16(std::vector<w_char>& dest, const std::string& src) {
case 0xd0: { // 2-byte UTF-8 codes
if ((*(u8 + 1) & 0xc0) == 0x80) {
u2.h = (*u8 & 0x1f) >> 2;
u2.l = (static_cast<unsigned char>(*u8) << 6) + (*(u8 + 1) & 0x3f);
u2.l = (*u8 << 6) + (*(u8 + 1) & 0x3f);
++u8;
} else {
HUNSPELL_WARNING(stderr,
@ -274,35 +275,34 @@ int u8_u16(std::vector<w_char>& dest, const std::string& src) {
return dest.size();
}
namespace {
class is_any_of {
public:
explicit is_any_of(const std::string& in) : chars(in) {}
// strip strings into token based on single char delimiter
// acts like strsep() but only uses a delim char and not
// a delim string
// default delimiter: white space characters
bool operator()(char c) { return chars.find(c) != std::string::npos; }
private:
std::string chars;
};
}
std::string::const_iterator mystrsep(const std::string &str,
std::string::const_iterator& start) {
std::string::const_iterator end = str.end();
is_any_of op(" \t");
// don't use isspace() here, the string can be in some random charset
// that's way different than the locale's
std::string::const_iterator sp = start;
while (sp != end && op(*sp))
++sp;
std::string::const_iterator dp = sp;
while (dp != end && !op(*dp))
++dp;
start = dp;
return sp;
char* mystrsep(char** stringp, const char delim) {
char* mp = *stringp;
if (*mp != '\0') {
char* dp;
if (delim) {
dp = strchr(mp, delim);
} else {
// don't use isspace() here, the string can be in some random charset
// that's way different than the locale's
for (dp = mp; (*dp && *dp != ' ' && *dp != '\t'); dp++)
;
if (!*dp)
dp = NULL;
}
if (dp) {
*stringp = dp + 1;
*dp = '\0';
} else {
*stringp = mp + strlen(mp);
}
return mp;
}
return NULL;
}
// replaces strdup with ansi version
@ -320,98 +320,142 @@ char* mystrdup(const char* s) {
return d;
}
// strcat for limited length destination string
char* mystrcat(char* dest, const char* st, int max) {
int len;
int len2;
if (dest == NULL || st == NULL)
return dest;
len = strlen(dest);
len2 = strlen(st);
if (len + len2 + 1 > max)
return dest;
strcpy(dest + len, st);
return dest;
}
// remove cross-platform text line end characters
void mychomp(std::string& s) {
size_t k = s.size();
size_t newsize = k;
if ((k > 0) && ((s[k - 1] == '\r') || (s[k - 1] == '\n')))
--newsize;
if ((k > 1) && (s[k - 2] == '\r'))
--newsize;
s.resize(newsize);
void mychomp(char* s) {
size_t k = strlen(s);
if ((k > 0) && ((*(s + k - 1) == '\r') || (*(s + k - 1) == '\n')))
*(s + k - 1) = '\0';
if ((k > 1) && (*(s + k - 2) == '\r'))
*(s + k - 2) = '\0';
}
// break text to lines
std::vector<std::string> line_tok(const std::string& text, char breakchar) {
std::vector<std::string> ret;
if (text.empty()) {
return ret;
// return number of lines
int line_tok(const char* text, char*** lines, char breakchar) {
int linenum = 0;
if (!text) {
return linenum;
}
char* dup = mystrdup(text);
char* p = strchr(dup, breakchar);
while (p) {
linenum++;
*p = '\0';
p++;
p = strchr(p, breakchar);
}
linenum++;
*lines = (char**)malloc(linenum * sizeof(char*));
if (!(*lines)) {
free(dup);
return 0;
}
std::stringstream ss(text);
std::string tok;
while(std::getline(ss, tok, breakchar)) {
if (!tok.empty()) {
ret.push_back(tok);
p = dup;
int l = 0;
for (int i = 0; i < linenum; i++) {
if (*p != '\0') {
(*lines)[l] = mystrdup(p);
if (!(*lines)[l]) {
for (i = 0; i < l; i++)
free((*lines)[i]);
free(dup);
return 0;
}
l++;
}
p += strlen(p) + 1;
}
return ret;
free(dup);
if (!l) {
free(*lines);
*lines = NULL;
}
return l;
}
// uniq line in place
void line_uniq(std::string& text, char breakchar)
{
std::vector<std::string> lines = line_tok(text, breakchar);
text.clear();
if (lines.empty()) {
return;
}
text = lines[0];
for (size_t i = 1; i < lines.size(); ++i) {
bool dup = false;
for (size_t j = 0; j < i; ++j) {
if (lines[i] == lines[j]) {
dup = true;
char* line_uniq(char* text, char breakchar) {
char** lines;
int linenum = line_tok(text, &lines, breakchar);
int i;
strcpy(text, lines[0]);
for (i = 1; i < linenum; i++) {
int dup = 0;
for (int j = 0; j < i; j++) {
if (strcmp(lines[i], lines[j]) == 0) {
dup = 1;
break;
}
}
if (!dup) {
if (!text.empty())
text.push_back(breakchar);
text.append(lines[i]);
if ((i > 1) || (*(lines[0]) != '\0')) {
sprintf(text + strlen(text), "%c", breakchar);
}
strcat(text, lines[i]);
}
}
for (i = 0; i < linenum; i++) {
free(lines[i]);
}
free(lines);
return text;
}
// uniq and boundary for compound analysis: "1\n\2\n\1" -> " ( \1 | \2 ) "
void line_uniq_app(std::string& text, char breakchar) {
if (text.find(breakchar) == std::string::npos) {
return;
char* line_uniq_app(char** text, char breakchar) {
if (!strchr(*text, breakchar)) {
return *text;
}
std::vector<std::string> lines = line_tok(text, breakchar);
text.clear();
if (lines.empty()) {
return;
}
text = lines[0];
for (size_t i = 1; i < lines.size(); ++i) {
bool dup = false;
for (size_t j = 0; j < i; ++j) {
if (lines[i] == lines[j]) {
dup = true;
char** lines;
int i;
int linenum = line_tok(*text, &lines, breakchar);
int dup = 0;
for (i = 0; i < linenum; i++) {
for (int j = 0; j < (i - 1); j++) {
if (strcmp(lines[i], lines[j]) == 0) {
*(lines[i]) = '\0';
dup++;
break;
}
}
if (!dup) {
if (!text.empty())
text.push_back(breakchar);
text.append(lines[i]);
}
if ((linenum - dup) == 1) {
strcpy(*text, lines[0]);
freelist(&lines, linenum);
return *text;
}
char* newtext = (char*)malloc(strlen(*text) + 2 * linenum + 3 + 1);
if (newtext) {
free(*text);
*text = newtext;
} else {
freelist(&lines, linenum);
return *text;
}
strcpy(*text, " ( ");
for (i = 0; i < linenum; i++)
if (*(lines[i])) {
sprintf(*text + strlen(*text), "%s%s", lines[i], " | ");
}
}
if (lines.size() == 1) {
text = lines[0];
return;
}
text.assign(" ( ");
for (size_t i = 0; i < lines.size(); ++i) {
text.append(lines[i]);
text.append(" | ");
}
text[text.size() - 2] = ')'; // " ) "
(*text)[strlen(*text) - 2] = ')'; // " ) "
freelist(&lines, linenum);
return *text;
}
// append s to ends of every lines in text
@ -425,6 +469,111 @@ std::string& strlinecat(std::string& str, const std::string& apd) {
return str;
}
// morphcmp(): compare MORPH_DERI_SFX, MORPH_INFL_SFX and MORPH_TERM_SFX fields
// in the first line of the inputs
// return 0, if inputs equal
// return 1, if inputs may equal with a secondary suffix
// otherwise return -1
int morphcmp(const char* s, const char* t) {
int se = 0;
int te = 0;
const char* sl;
const char* tl;
const char* olds;
const char* oldt;
if (!s || !t)
return 1;
olds = s;
sl = strchr(s, '\n');
s = strstr(s, MORPH_DERI_SFX);
if (!s || (sl && sl < s))
s = strstr(olds, MORPH_INFL_SFX);
if (!s || (sl && sl < s)) {
s = strstr(olds, MORPH_TERM_SFX);
olds = NULL;
}
oldt = t;
tl = strchr(t, '\n');
t = strstr(t, MORPH_DERI_SFX);
if (!t || (tl && tl < t))
t = strstr(oldt, MORPH_INFL_SFX);
if (!t || (tl && tl < t)) {
t = strstr(oldt, MORPH_TERM_SFX);
oldt = NULL;
}
while (s && t && (!sl || sl > s) && (!tl || tl > t)) {
s += MORPH_TAG_LEN;
t += MORPH_TAG_LEN;
se = 0;
te = 0;
while ((*s == *t) && !se && !te) {
s++;
t++;
switch (*s) {
case ' ':
case '\n':
case '\t':
case '\0':
se = 1;
}
switch (*t) {
case ' ':
case '\n':
case '\t':
case '\0':
te = 1;
}
}
if (!se || !te) {
// not terminal suffix difference
if (olds)
return -1;
return 1;
}
olds = s;
s = strstr(s, MORPH_DERI_SFX);
if (!s || (sl && sl < s))
s = strstr(olds, MORPH_INFL_SFX);
if (!s || (sl && sl < s)) {
s = strstr(olds, MORPH_TERM_SFX);
olds = NULL;
}
oldt = t;
t = strstr(t, MORPH_DERI_SFX);
if (!t || (tl && tl < t))
t = strstr(oldt, MORPH_INFL_SFX);
if (!t || (tl && tl < t)) {
t = strstr(oldt, MORPH_TERM_SFX);
oldt = NULL;
}
}
if (!s && !t && se && te)
return 0;
return 1;
}
int get_sfxcount(const char* morph) {
if (!morph || !*morph)
return 0;
int n = 0;
const char* old = morph;
morph = strstr(morph, MORPH_DERI_SFX);
if (!morph)
morph = strstr(old, MORPH_INFL_SFX);
if (!morph)
morph = strstr(old, MORPH_TERM_SFX);
while (morph) {
n++;
old = morph;
morph = strstr(morph + 1, MORPH_DERI_SFX);
if (!morph)
morph = strstr(old + 1, MORPH_INFL_SFX);
if (!morph)
morph = strstr(old + 1, MORPH_TERM_SFX);
}
return n;
}
int fieldlen(const char* r) {
int n = 0;
while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') {
@ -466,6 +615,33 @@ std::string& mystrrep(std::string& str,
return str;
}
char* mystrrep(char* word, const char* pat, const char* rep) {
char* pos = strstr(word, pat);
if (pos) {
int replen = strlen(rep);
int patlen = strlen(pat);
while (pos) {
if (replen < patlen) {
char* end = word + strlen(word);
char* next = pos + replen;
char* prev = pos + strlen(pat);
for (; prev < end;* next = *prev, prev++, next++)
;
*next = '\0';
} else if (replen > patlen) {
char* end = pos + patlen;
char* next = word + strlen(word) + replen - patlen;
char* prev = next - replen + patlen;
for (; prev >= end;* next = *prev, prev--, next--)
;
}
strncpy(pos, rep, replen);
pos = strstr(word, pat);
}
}
return word;
}
// reverse word
size_t reverseword(std::string& word) {
std::reverse(word.begin(), word.end());
@ -481,19 +657,35 @@ size_t reverseword_utf(std::string& word) {
return w.size();
}
void uniqlist(std::vector<std::string>& list) {
if (list.size() < 2)
return;
std::vector<std::string> ret;
ret.push_back(list[0]);
for (size_t i = 1; i < list.size(); ++i) {
if (std::find(ret.begin(), ret.end(), list[i]) == ret.end())
ret.push_back(list[i]);
int uniqlist(char** list, int n) {
int i;
if (n < 2)
return n;
for (i = 0; i < n; i++) {
for (int j = 0; j < i; j++) {
if (list[j] && list[i] && (strcmp(list[j], list[i]) == 0)) {
free(list[i]);
list[i] = NULL;
break;
}
}
}
int m = 1;
for (i = 1; i < n; i++)
if (list[i]) {
list[m] = list[i];
m++;
}
return m;
}
list.swap(ret);
void freelist(char*** list, int n) {
if (list && *list) {
for (int i = 0; i < n; i++)
free((*list)[i]);
free(*list);
*list = NULL;
}
}
namespace {
@ -2265,9 +2457,9 @@ static void toAsciiLowerAndRemoveNonAlphanumeric(const char* pName,
*pBuf = '\0';
}
struct cs_info* get_current_cs(const std::string& es) {
char* normalized_encoding = new char[es.size() + 1];
toAsciiLowerAndRemoveNonAlphanumeric(es.c_str(), normalized_encoding);
struct cs_info* get_current_cs(const char* es) {
char* normalized_encoding = new char[strlen(es) + 1];
toAsciiLowerAndRemoveNonAlphanumeric(es, normalized_encoding);
struct cs_info* ccs = NULL;
int n = sizeof(encds) / sizeof(encds[0]);
@ -2282,7 +2474,7 @@ struct cs_info* get_current_cs(const std::string& es) {
if (!ccs) {
HUNSPELL_WARNING(stderr,
"error: unknown encoding %s: using %s as fallback\n", es.c_str(),
"error: unknown encoding %s: using %s as fallback\n", es,
encds[0].enc_name);
ccs = encds[0].cs_table;
}
@ -2293,7 +2485,7 @@ struct cs_info* get_current_cs(const std::string& es) {
// XXX This function was rewritten for mozilla. Instead of storing the
// conversion tables static in this file, create them when needed
// with help the mozilla backend.
struct cs_info* get_current_cs(const std::string& es) {
struct cs_info* get_current_cs(const char* es) {
struct cs_info* ccs = new cs_info[256];
// Initialze the array with dummy data so that we wouldn't need
// to return null in case of failures.
@ -2308,7 +2500,7 @@ struct cs_info* get_current_cs(const std::string& es) {
nsresult rv;
nsAutoCString label(es.c_str());
nsAutoCString label(es);
nsAutoCString encoding;
if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
return ccs;
@ -2373,18 +2565,21 @@ struct cs_info* get_current_cs(const std::string& es) {
#endif
// primitive isalpha() replacement for tokenization
std::string get_casechars(const char* enc) {
char* get_casechars(const char* enc) {
struct cs_info* csconv = get_current_cs(enc);
std::string expw;
for (int i = 0; i <= 255; ++i) {
char expw[MAXLNLEN];
char* p = expw;
for (int i = 0; i <= 255; i++) {
if (cupper(csconv, i) != clower(csconv, i)) {
expw.push_back(static_cast<char>(i));
*p = static_cast<char>(i);
p++;
}
}
*p = '\0';
#ifdef MOZILLA_CLIENT
delete[] csconv;
#endif
return expw;
return mystrdup(expw);
}
// language to encoding default map
@ -2411,10 +2606,10 @@ static struct lang_map lang2enc[] =
{"tr_TR", LANG_tr}, // for back-compatibility
{"ru", LANG_ru}, {"uk", LANG_uk}};
int get_lang_num(const std::string& lang) {
int get_lang_num(const char* lang) {
int n = sizeof(lang2enc) / sizeof(lang2enc[0]);
for (int i = 0; i < n; i++) {
if (strcmp(lang.c_str(), lang2enc[i].lang) == 0) {
if (strcmp(lang, lang2enc[i].lang) == 0) {
return lang2enc[i].num;
}
}
@ -2423,21 +2618,26 @@ int get_lang_num(const std::string& lang) {
#ifndef OPENOFFICEORG
#ifndef MOZILLA_CLIENT
void initialize_utf_tbl() {
int initialize_utf_tbl() {
utf_tbl_count++;
if (utf_tbl)
return;
utf_tbl = new unicode_info2[CONTSIZE];
for (size_t j = 0; j < CONTSIZE; ++j) {
utf_tbl[j].cletter = 0;
utf_tbl[j].clower = (unsigned short)j;
utf_tbl[j].cupper = (unsigned short)j;
}
for (size_t j = 0; j < UTF_LST_LEN; ++j) {
utf_tbl[utf_lst[j].c].cletter = 1;
utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
}
return 0;
utf_tbl = (unicode_info2*)malloc(CONTSIZE * sizeof(unicode_info2));
if (utf_tbl) {
size_t j;
for (j = 0; j < CONTSIZE; j++) {
utf_tbl[j].cletter = 0;
utf_tbl[j].clower = (unsigned short)j;
utf_tbl[j].cupper = (unsigned short)j;
}
for (j = 0; j < UTF_LST_LEN; j++) {
utf_tbl[utf_lst[j].c].cletter = 1;
utf_tbl[utf_lst[j].c].clower = utf_lst[j].clower;
utf_tbl[utf_lst[j].c].cupper = utf_lst[j].cupper;
}
} else
return 1;
return 0;
}
#endif
#endif
@ -2446,7 +2646,7 @@ void free_utf_tbl() {
if (utf_tbl_count > 0)
utf_tbl_count--;
if (utf_tbl && (utf_tbl_count == 0)) {
delete[] utf_tbl;
free(utf_tbl);
utf_tbl = NULL;
}
}
@ -2575,6 +2775,18 @@ size_t remove_ignored_chars_utf(std::string& word,
return w2.size();
}
namespace {
class is_any_of {
public:
is_any_of(const std::string& in) : chars(in) {}
bool operator()(char c) { return chars.find(c) != std::string::npos; }
private:
std::string chars;
};
}
// strip all ignored characters in the string
size_t remove_ignored_chars(std::string& word,
const std::string& ignored_chars) {
@ -2584,48 +2796,54 @@ size_t remove_ignored_chars(std::string& word,
return word.size();
}
bool parse_string(const std::string& line, std::string& out, int ln) {
if (!out.empty()) {
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
return false;
}
int parse_string(char* line, char** out, int ln) {
char* tp = line;
char* piece;
int i = 0;
int np = 0;
std::string::const_iterator iter = line.begin();
std::string::const_iterator start_piece = mystrsep(line, iter);
while (start_piece != line.end()) {
switch (i) {
case 0: {
np++;
break;
if (*out) {
HUNSPELL_WARNING(stderr, "error: line %d: multiple definitions\n", ln);
return 1;
}
piece = mystrsep(&tp, 0);
while (piece) {
if (*piece != '\0') {
switch (i) {
case 0: {
np++;
break;
}
case 1: {
*out = mystrdup(piece);
if (!*out)
return 1;
np++;
break;
}
default:
break;
}
case 1: {
out.assign(start_piece, iter);
np++;
break;
}
default:
break;
i++;
}
++i;
start_piece = mystrsep(line, iter);
// free(piece);
piece = mystrsep(&tp, 0);
}
if (np != 2) {
HUNSPELL_WARNING(stderr, "error: line %d: missing data\n", ln);
return false;
return 1;
}
return true;
return 0;
}
bool parse_array(const std::string& line,
std::string& out,
bool parse_array(char* line,
char** out,
std::vector<w_char>& out_utf16,
int utf8,
int ln) {
if (!parse_string(line, out, ln))
if (parse_string(line, out, ln))
return false;
if (utf8) {
u8_u16(out_utf16, out);
u8_u16(out_utf16, *out);
std::sort(out_utf16.begin(), out_utf16.end());
}
return true;

View File

@ -71,14 +71,13 @@
* SUCH DAMAGE.
*/
#ifndef CSUTIL_HXX_
#define CSUTIL_HXX_
#ifndef __CSUTILHXX__
#define __CSUTILHXX__
#include "hunvisapi.h"
// First some base level utility routines
#include <fstream>
#include <string>
#include <vector>
#include <string.h>
@ -128,9 +127,8 @@
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
// fix long pathname problem of WIN32 by using w_char std::fstream::open override
LIBHUNSPELL_DLL_EXPORTED void myopen(std::ifstream& stream, const char* path,
std::ios_base::openmode mode);
// fopen or optional _wfopen to fix long pathname problem of WIN32
LIBHUNSPELL_DLL_EXPORTED FILE* myfopen(const char* path, const char* mode);
// convert UTF-16 characters to UTF-8
LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
@ -141,16 +139,21 @@ LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
const std::string& src);
// remove end of line char(s)
LIBHUNSPELL_DLL_EXPORTED void mychomp(std::string& s);
LIBHUNSPELL_DLL_EXPORTED void mychomp(char* s);
// duplicate string
LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
// strcat for limited length destination string
LIBHUNSPELL_DLL_EXPORTED char* mystrcat(char* dest, const char* st, int max);
// parse into tokens with char delimiter
LIBHUNSPELL_DLL_EXPORTED std::string::const_iterator mystrsep(const std::string &str,
std::string::const_iterator& start);
LIBHUNSPELL_DLL_EXPORTED char* mystrsep(char** sptr, const char delim);
// replace pat by rep in word and return word
LIBHUNSPELL_DLL_EXPORTED char* mystrrep(char* word,
const char* pat,
const char* rep);
LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
const std::string& search,
const std::string& replace);
@ -160,13 +163,13 @@ LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
const std::string& apd);
// tokenize into lines with new line
LIBHUNSPELL_DLL_EXPORTED std::vector<std::string> line_tok(const std::string& text,
char breakchar);
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char* text,
char*** lines,
char breakchar);
// tokenize into lines with new line and uniq in place
LIBHUNSPELL_DLL_EXPORTED void line_uniq(std::string& text, char breakchar);
LIBHUNSPELL_DLL_EXPORTED void line_uniq_app(std::string& text, char breakchar);
LIBHUNSPELL_DLL_EXPORTED char* line_uniq(char* text, char breakchar);
LIBHUNSPELL_DLL_EXPORTED char* line_uniq_app(char** text, char breakchar);
// reverse word
LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
@ -175,7 +178,10 @@ LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
// remove duplicates
LIBHUNSPELL_DLL_EXPORTED void uniqlist(std::vector<std::string>& list);
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char** list, int n);
// free character array list
LIBHUNSPELL_DLL_EXPORTED void freelist(char*** list, int n);
// character encoding information
struct cs_info {
@ -184,7 +190,7 @@ struct cs_info {
unsigned char cupper;
};
LIBHUNSPELL_DLL_EXPORTED void initialize_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
int langnum);
@ -194,13 +200,13 @@ LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
int langnum);
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const std::string& es);
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const char* es);
// get language identifiers of language codes
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const std::string& lang);
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char* lang);
// get characters of the given 8bit encoding with lower- and uppercase forms
LIBHUNSPELL_DLL_EXPORTED std::string get_casechars(const char* enc);
LIBHUNSPELL_DLL_EXPORTED char* get_casechars(const char* enc);
// convert std::string to all caps
LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
@ -250,12 +256,10 @@ LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
std::string& word,
const std::string& ignored_chars);
LIBHUNSPELL_DLL_EXPORTED bool parse_string(const std::string& line,
std::string& out,
int ln);
LIBHUNSPELL_DLL_EXPORTED int parse_string(char* line, char** out, int ln);
LIBHUNSPELL_DLL_EXPORTED bool parse_array(const std::string& line,
std::string& out,
LIBHUNSPELL_DLL_EXPORTED bool parse_array(char* line,
char** out,
std::vector<w_char>& out_utf16,
int utf8,
int ln);
@ -266,6 +270,10 @@ LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
const std::string& morph,
const std::string& var);
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char* s, const char* t);
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char* morph);
// conversion function for protected memory
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);

View File

@ -86,33 +86,33 @@ int FileMgr::fail(const char* err, const char* par) {
FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) {
in[0] = '\0';
myopen(fin, file, std::ios_base::in);
if (!fin.is_open()) {
fin = myfopen(file, "r");
if (!fin) {
// check hzipped file
std::string st(file);
st.append(HZIP_EXTENSION);
hin = new Hunzip(st.c_str(), key);
}
if (!fin.is_open() && !hin->is_open())
if (!fin && !hin)
fail(MSG_OPEN, file);
}
FileMgr::~FileMgr() {
delete hin;
if (fin)
fclose(fin);
if (hin)
delete hin;
}
bool FileMgr::getline(std::string& dest) {
bool ret = false;
++linenum;
if (fin.is_open()) {
ret = static_cast<bool>(std::getline(fin, dest));
} else if (hin->is_open()) {
ret = hin->getline(dest);
}
if (!ret) {
--linenum;
}
return ret;
char* FileMgr::getline() {
const char* l;
linenum++;
if (fin)
return fgets(in, BUFSIZE - 1, fin);
if (hin && ((l = hin->getline()) != NULL))
return strcpy(in, l);
linenum--;
return NULL;
}
int FileMgr::getlinenum() {

View File

@ -72,21 +72,21 @@
*/
/* file manager class - read lines of files [filename] OR [filename.hz] */
#ifndef FILEMGR_HXX_
#define FILEMGR_HXX_
#ifndef _FILEMGR_HXX_
#define _FILEMGR_HXX_
#include "hunvisapi.h"
#include "hunzip.hxx"
#include <stdio.h>
#include <string>
#include <fstream>
class FileMgr {
class LIBHUNSPELL_DLL_EXPORTED FileMgr {
private:
FileMgr(const FileMgr&);
FileMgr& operator=(const FileMgr&);
protected:
std::ifstream fin;
FILE* fin;
Hunzip* hin;
char in[BUFSIZE + 50]; // input buffer
int fail(const char* err, const char* par);
@ -95,7 +95,7 @@ class FileMgr {
public:
FileMgr(const char* filename, const char* key = NULL);
~FileMgr();
bool getline(std::string&);
char* getline();
int getlinenum();
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -71,8 +71,10 @@
* SUCH DAMAGE.
*/
#ifndef HASHMGR_HXX_
#define HASHMGR_HXX_
#ifndef _HASHMGR_HXX_
#define _HASHMGR_HXX_
#include "hunvisapi.h"
#include <stdio.h>
#include <string>
@ -84,7 +86,7 @@
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
class HashMgr {
class LIBHUNSPELL_DLL_EXPORTED HashMgr {
int tablesize;
struct hentry** tableptr;
flag flag_mode;
@ -92,10 +94,10 @@ class HashMgr {
int utf8;
unsigned short forbiddenword;
int langnum;
std::string enc;
std::string lang;
char* enc;
char* lang;
struct cs_info* csconv;
std::string ignorechars;
char* ignorechars;
std::vector<w_char> ignorechars_utf16;
int numaliasf; // flag vector `compression' with aliases
unsigned short** aliasf;
@ -112,35 +114,35 @@ class HashMgr {
struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
int add(const std::string& word);
int add_with_affix(const std::string& word, const std::string& pattern);
int remove(const std::string& word);
int decode_flags(unsigned short** result, const std::string& flags, FileMgr* af) const;
bool decode_flags(std::vector<unsigned short>& result, const std::string& flags, FileMgr* af) const;
unsigned short decode_flag(const char* flag) const;
char* encode_flag(unsigned short flag) const;
int is_aliasf() const;
int get_aliasf(int index, unsigned short** fvec, FileMgr* af) const;
int is_aliasm() const;
char* get_aliasm(int index) const;
int add_with_affix(const char* word, const char* pattern);
int remove(const char* word);
int decode_flags(unsigned short** result, char* flags, FileMgr* af);
unsigned short decode_flag(const char* flag);
char* encode_flag(unsigned short flag);
int is_aliasf();
int get_aliasf(int index, unsigned short** fvec, FileMgr* af);
int is_aliasm();
char* get_aliasm(int index);
private:
int get_clen_and_captype(const std::string& word, int* captype);
int load_tables(const char* tpath, const char* key);
int add_word(const std::string& word,
int add_word(const char* word,
int wbl,
int wcl,
unsigned short* ap,
int al,
const std::string* desc,
const char* desc,
bool onlyupcase);
int load_config(const char* affpath, const char* key);
bool parse_aliasf(const std::string& line, FileMgr* af);
int parse_aliasf(char* line, FileMgr* af);
int add_hidden_capitalized_word(const std::string& word,
int wcl,
unsigned short* flags,
int al,
const std::string* dp,
char* dp,
int captype);
bool parse_aliasm(const std::string& line, FileMgr* af);
int parse_aliasm(char* line, FileMgr* af);
int remove_forbidden_flag(const std::string& word);
};

View File

@ -38,8 +38,8 @@
*
* ***** END LICENSE BLOCK ***** */
#ifndef HTYPES_HXX_
#define HTYPES_HXX_
#ifndef _HTYPES_HXX_
#define _HTYPES_HXX_
#define ROTATE_LEN 5

File diff suppressed because it is too large Load Diff

View File

@ -38,8 +38,8 @@
*
* ***** END LICENSE BLOCK ***** */
#ifndef MYSPELLMGR_H_
#define MYSPELLMGR_H_
#ifndef _MYSPELLMGR_H_
#define _MYSPELLMGR_H_
#include "hunvisapi.h"
@ -68,7 +68,7 @@ LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
*/
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
LIBHUNSPELL_DLL_EXPORTED const char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word

View File

@ -70,29 +70,26 @@
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#ifndef MYSPELLMGR_HXX_
#define MYSPELLMGR_HXX_
#include "hunvisapi.h"
#include "w_char.hxx"
#include <string>
#include "hashmgr.hxx"
#include "affixmgr.hxx"
#include "suggestmgr.hxx"
#include "langnum.hxx"
#include <vector>
#define SPELL_XML "<?xml?>"
#define MAXDIC 20
#define MAXSUGGESTION 15
#define MAXSHARPS 5
#define MAXWORDLEN 176
#if __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)
# define H_DEPRECATED __attribute__((__deprecated__))
#elif defined(_MSC_VER) && (_MSC_VER >= 1300)
# define H_DEPRECATED __declspec(deprecated)
#else
# define H_DEPRECATED
#endif
#define HUNSPELL_OK (1 << 0)
#define HUNSPELL_OK_WARN (1 << 1)
class HunspellImpl;
#ifndef _MYSPELLMGR_HXX_
#define _MYSPELLMGR_HXX_
class LIBHUNSPELL_DLL_EXPORTED Hunspell {
private:
@ -100,7 +97,17 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
Hunspell& operator=(const Hunspell&);
private:
HunspellImpl* m_Impl;
AffixMgr* pAMgr;
HashMgr* pHMgr[MAXDIC];
int maxdic;
SuggestMgr* pSMgr;
char* affixpath;
char* encoding;
struct cs_info* csconv;
int langnum;
int utf8;
int complexprefixes;
char** wordbreak;
public:
/* Hunspell(aff, dic) - constructor of Hunspell class
@ -111,6 +118,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
* long path names (without the long path prefix Hunspell will use fopen()
* with system-dependent character encoding instead of _wfopen()).
*/
Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
~Hunspell();
@ -118,7 +126,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
int add_dic(const char* dpath, const char* key = NULL);
/* spell(word) - spellcheck word
* output: false = bad word, true = good word
* output: 0 = bad word, not 0 = good word
*
* plus output:
* info: information bit array, fields:
@ -126,8 +134,8 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
* SPELL_FORBIDDEN = an explicit forbidden word
* root: root (stem), when input is a word with affix(es)
*/
bool spell(const std::string& word, int* info = NULL, std::string* root = NULL);
H_DEPRECATED int spell(const char* word, int* info = NULL, char** root = NULL);
int spell(const char* word, int* info = NULL, char** root = NULL);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word
@ -136,8 +144,8 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
*/
std::vector<std::string> suggest(const std::string& word);
H_DEPRECATED int suggest(char*** slst, const char* word);
int suggest(char*** slst, const char* word);
/* Suggest words from suffix rules
* suffix_suggest(suggestions, root_word)
@ -147,37 +155,36 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
*/
std::vector<std::string> suffix_suggest(const std::string& root_word);
H_DEPRECATED int suffix_suggest(char*** slst, const char* root_word);
int suffix_suggest(char*** slst, const char* root_word);
/* deallocate suggestion lists */
H_DEPRECATED void free_list(char*** slst, int n);
const std::string& get_dict_encoding() const;
H_DEPRECATED const char* get_dic_encoding() const;
void free_list(char*** slst, int n);
char* get_dic_encoding();
/* morphological functions */
/* analyze(result, word) - morphological analysis of the word */
std::vector<std::string> analyze(const std::string& word);
H_DEPRECATED int analyze(char*** slst, const char* word);
/* stem(word) - stemmer function */
std::vector<std::string> stem(const std::string& word);
H_DEPRECATED int stem(char*** slst, const char* word);
int analyze(char*** slst, const char* word);
/* stem(analysis, n) - get stems from a morph. analysis
/* stem(result, word) - stemmer function */
int stem(char*** slst, const char* word);
/* stem(result, analysis, n) - get stems from a morph. analysis
* example:
* char ** result, result2;
* int n1 = analyze(&result, "words");
* int n2 = stem(&result2, result, n1);
*/
std::vector<std::string> stem(const std::vector<std::string>& morph);
H_DEPRECATED int stem(char*** slst, char** morph, int n);
int stem(char*** slst, char** morph, int n);
/* generate(result, word, word2) - morphological generation by example(s) */
std::vector<std::string> generate(const std::string& word, const std::string& word2);
H_DEPRECATED int generate(char*** slst, const char* word, const char* word2);
int generate(char*** slst, const char* word, const char* word2);
/* generate(result, word, desc, n) - generation by morph. description(s)
* example:
@ -186,38 +193,66 @@ class LIBHUNSPELL_DLL_EXPORTED Hunspell {
* int n = generate(&result, "word", &affix, 1);
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
*/
std::vector<std::string> generate(const std::string& word, const std::vector<std::string>& pl);
H_DEPRECATED int generate(char*** slst, const char* word, char** desc, int n);
int generate(char*** slst, const char* word, char** desc, int n);
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
int add(const std::string& word);
int add(const char* word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
*/
int add_with_affix(const std::string& word, const std::string& example);
int add_with_affix(const char* word, const char* example);
/* remove word from the run-time dictionary */
int remove(const std::string& word);
int remove(const char* word);
/* other */
/* get extra word characters definied in affix file for tokenization */
const std::string& get_wordchars() const;
const std::vector<w_char>& get_wordchars_utf16() const;
const char* get_wordchars();
const std::vector<w_char>& get_wordchars_utf16();
const std::string& get_version() const;
struct cs_info* get_csconv();
const char* get_version();
int get_langnum() const;
/* need for putdic */
bool input_conv(const std::string& word, std::string& dest);
int input_conv(const char* word, char* dest, size_t destsize);
private:
void cleanword(std::string& dest, const char*, int* pcaptype, int* pabbrev);
size_t cleanword2(std::string& dest,
std::vector<w_char>& dest_u,
const char*,
int* w_len,
int* pcaptype,
size_t* pabbrev);
void mkinitcap(std::string& u8);
int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
void mkallcap(std::string& u8);
int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
struct hentry* checkword(const char*, int* info, char** root);
std::string sharps_u8_l1(const std::string& source);
hentry*
spellsharps(std::string& base, size_t start_pos, int, int, int* info, char** root);
int is_keepcase(const hentry* rv);
int insert_sug(char*** slst, const char* word, int ns);
void cat_result(std::string& result, char* st);
char* stem_description(const char* desc);
int spellml(char*** slst, const char* word);
std::string get_xml_par(const char* par);
const char* get_xml_pos(const char* s, const char* attr);
int get_xml_list(char*** slst, const char* list, const char* tag);
int check_xml_par(const char* q, const char* attr, const char* value);
};
#endif

View File

@ -1,5 +1,5 @@
#ifndef HUNSPELL_VISIBILITY_H_
#define HUNSPELL_VISIBILITY_H_
#ifndef _HUNSPELL_VISIBILITY_H_
#define _HUNSPELL_VISIBILITY_H_
#if defined(HUNSPELL_STATIC)
# define LIBHUNSPELL_DLL_EXPORTED

View File

@ -59,7 +59,7 @@ int Hunzip::fail(const char* err, const char* par) {
}
Hunzip::Hunzip(const char* file, const char* key)
: bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0) {
: fin(NULL), bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0), dec(NULL) {
in[0] = out[0] = line[0] = '\0';
filename = mystrdup(file);
if (getcode(key) == -1)
@ -70,19 +70,19 @@ Hunzip::Hunzip(const char* file, const char* key)
int Hunzip::getcode(const char* key) {
unsigned char c[2];
int i, j, n;
int i, j, n, p;
int allocatedbit = BASEBITREC;
const char* enc = key;
if (!filename)
return -1;
myopen(fin, filename, std::ios_base::in | std::ios_base::binary);
if (!fin.is_open())
fin = myfopen(filename, "rb");
if (!fin)
return -1;
// read magic number
if (!fin.read(in, 3) ||
if ((fread(in, 1, 3, fin) < MAGICLEN) ||
!(strncmp(MAGIC, in, MAGICLEN) == 0 ||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
return fail(MSG_FORMAT, filename);
@ -93,7 +93,7 @@ int Hunzip::getcode(const char* key) {
unsigned char cs;
if (!key)
return fail(MSG_KEY, filename);
if (!fin.read(reinterpret_cast<char*>(c), 1))
if (fread(&c, 1, 1, fin) < 1)
return fail(MSG_FORMAT, filename);
for (cs = 0; *enc; enc++)
cs ^= *enc;
@ -104,7 +104,7 @@ int Hunzip::getcode(const char* key) {
key = NULL;
// read record count
if (!fin.read(reinterpret_cast<char*>(c), 2))
if (fread(&c, 1, 2, fin) < 2)
return fail(MSG_FORMAT, filename);
if (key) {
@ -115,14 +115,16 @@ int Hunzip::getcode(const char* key) {
}
n = ((int)c[0] << 8) + c[1];
dec.resize(BASEBITREC);
dec = (struct bit*)malloc(BASEBITREC * sizeof(struct bit));
if (!dec)
return fail(MSG_MEMORY, filename);
dec[0].v[0] = 0;
dec[0].v[1] = 0;
// read codes
for (i = 0; i < n; i++) {
unsigned char l;
if (!fin.read(reinterpret_cast<char*>(c), 2))
if (fread(c, 1, 2, fin) < 2)
return fail(MSG_FORMAT, filename);
if (key) {
if (*(++enc) == '\0')
@ -132,14 +134,14 @@ int Hunzip::getcode(const char* key) {
enc = key;
c[1] ^= *enc;
}
if (!fin.read(reinterpret_cast<char*>(&l), 1))
if (fread(&l, 1, 1, fin) < 1)
return fail(MSG_FORMAT, filename);
if (key) {
if (*(++enc) == '\0')
enc = key;
l ^= *enc;
}
if (!fin.read(in, l / 8 + 1))
if (fread(in, 1, l / 8 + 1, fin) < (size_t)l / 8 + 1)
return fail(MSG_FORMAT, filename);
if (key)
for (j = 0; j <= l / 8; j++) {
@ -147,7 +149,7 @@ int Hunzip::getcode(const char* key) {
enc = key;
in[j] ^= *enc;
}
int p = 0;
p = 0;
for (j = 0; j < l; j++) {
int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
int oldp = p;
@ -156,7 +158,7 @@ int Hunzip::getcode(const char* key) {
lastbit++;
if (lastbit == allocatedbit) {
allocatedbit += BASEBITREC;
dec.resize(allocatedbit);
dec = (struct bit*)realloc(dec, allocatedbit * sizeof(struct bit));
}
dec[lastbit].v[0] = 0;
dec[lastbit].v[1] = 0;
@ -171,6 +173,10 @@ int Hunzip::getcode(const char* key) {
}
Hunzip::~Hunzip() {
if (dec)
free(dec);
if (fin)
fclose(fin);
if (filename)
free(filename);
}
@ -179,17 +185,16 @@ int Hunzip::getbuf() {
int p = 0;
int o = 0;
do {
if (inc == 0) {
fin.read(in, BUFSIZE);
inbits = fin.gcount() * 8;
}
if (inc == 0)
inbits = fread(in, 1, BUFSIZE, fin) * 8;
for (; inc < inbits; inc++) {
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
int oldp = p;
p = dec[p].v[b];
if (p == 0) {
if (oldp == lastbit) {
fin.close();
fclose(fin);
fin = NULL;
// add last odd byte
if (dec[lastbit].c[0])
out[o++] = dec[lastbit].c[1];
@ -207,11 +212,11 @@ int Hunzip::getbuf() {
return fail(MSG_FORMAT, filename);
}
bool Hunzip::getline(std::string& dest) {
const char* Hunzip::getline() {
char linebuf[BUFSIZE];
int l = 0, eol = 0, left = 0, right = 0;
if (bufsiz == -1)
return false;
return NULL;
while (l < bufsiz && !eol) {
linebuf[l++] = out[outc];
switch (out[outc]) {
@ -246,7 +251,7 @@ bool Hunzip::getline(std::string& dest) {
}
if (++outc == bufsiz) {
outc = 0;
bufsiz = fin.is_open() ? getbuf() : -1;
bufsiz = fin ? getbuf() : -1;
}
}
if (right)
@ -254,6 +259,5 @@ bool Hunzip::getline(std::string& dest) {
else
linebuf[l] = '\0';
strcpy(line + left, linebuf);
dest.assign(line);
return true;
return line;
}

View File

@ -41,14 +41,12 @@
/* hunzip: file decompression for sorted dictionaries with optional encryption,
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
#ifndef HUNZIP_HXX_
#define HUNZIP_HXX_
#ifndef _HUNZIP_HXX_
#define _HUNZIP_HXX_
#include "hunvisapi.h"
#include <stdio.h>
#include <fstream>
#include <vector>
#define BUFSIZE 65536
#define HZIP_EXTENSION ".hz"
@ -70,9 +68,9 @@ class LIBHUNSPELL_DLL_EXPORTED Hunzip {
protected:
char* filename;
std::ifstream fin;
FILE* fin;
int bufsiz, lastbit, inc, inbits, outc;
std::vector<bit> dec; // code table
struct bit* dec; // code table
char in[BUFSIZE]; // input buffer
char out[BUFSIZE + 1]; // Huffman-decoded buffer
char line[BUFSIZE + 50]; // decoded line
@ -83,8 +81,7 @@ class LIBHUNSPELL_DLL_EXPORTED Hunzip {
public:
Hunzip(const char* filename, const char* key = NULL);
~Hunzip();
bool is_open() { return fin.is_open(); }
bool getline(std::string& dest);
const char* getline();
};
#endif

View File

@ -38,12 +38,12 @@
*
* ***** END LICENSE BLOCK ***** */
#ifndef LANGNUM_HXX_
#define LANGNUM_HXX_
#ifndef _LANGNUM_HXX_
#define _LANGNUM_HXX_
/*
language numbers for language specific codes
see https://wiki.openoffice.org/w/index.php?title=Languages&oldid=230199
see http://l10n.openoffice.org/languages.html
*/
enum {

View File

@ -36,13 +36,15 @@
#include "phonet.hxx"
void init_phonet_hash(phonetable& parms) {
for (int i = 0; i < HASHSIZE; i++) {
int i, k;
for (i = 0; i < HASHSIZE; i++) {
parms.hash[i] = -1;
}
for (int i = 0; parms.rules[i][0] != '\0'; i += 2) {
for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
/** set hash value **/
int k = (unsigned char)parms.rules[i][0];
k = (unsigned char)parms.rules[i][0];
if (parms.hash[k] < 0) {
parms.hash[k] = i;
@ -71,8 +73,9 @@ static int myisalpha(char ch) {
std::string phonet(const std::string& inword, phonetable& parms) {
int i, k = 0, p, z;
int k0, n0, p0 = -333;
int k0, n0, p0 = -333, z0;
char c;
const char* s;
typedef unsigned char uchar;
size_t len = inword.size();
@ -87,15 +90,15 @@ std::string phonet(const std::string& inword, phonetable& parms) {
i = z = 0;
while ((c = word[i]) != '\0') {
int n = parms.hash[(uchar)c];
int z0 = 0;
z0 = 0;
if (n >= 0 && !parms.rules[n].empty()) {
if (n >= 0) {
/** check all rules for the same letter **/
while (parms.rules[n][0] == c) {
/** check whole string **/
k = 1; /** number of found letters **/
p = 5; /** default priority **/
const char*s = parms.rules[n].c_str();
s = parms.rules[n];
s++; /** important for (see below) "*(s-1)" **/
while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) &&
@ -139,13 +142,13 @@ std::string phonet(const std::string& inword, phonetable& parms) {
n0 = parms.hash[(uchar)c0];
// if (parms.followup && k > 1 && n0 >= 0
if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0' && !parms.rules[n0].empty()) {
if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0') {
/** test follow-up rule for "word[i+k]" **/
while (parms.rules[n0][0] == c0) {
/** check whole string **/
k0 = k;
p0 = 5;
s = parms.rules[n0].c_str();
s = parms.rules[n0];
s++;
while (*s != '\0' && word[i + k0] == *s &&
!isdigit((unsigned char)*s) &&
@ -203,9 +206,9 @@ std::string phonet(const std::string& inword, phonetable& parms) {
} /** end of follow-up stuff **/
/** replace string **/
s = parms.rules[n + 1].c_str();
p0 = (!parms.rules[n].empty() &&
strchr(parms.rules[n].c_str() + 1, '<') != NULL)
s = parms.rules[n + 1];
p0 = (parms.rules[n][0] != '\0' &&
strchr(parms.rules[n] + 1, '<') != NULL)
? 1
: 0;
if (p0 == 1 && z == 0) {
@ -238,8 +241,8 @@ std::string phonet(const std::string& inword, phonetable& parms) {
}
/** new "actual letter" **/
c = *s;
if (!parms.rules[n].empty() &&
strstr(parms.rules[n].c_str() + 1, "^^") != NULL) {
if (parms.rules[n][0] != '\0' &&
strstr(parms.rules[n] + 1, "^^") != NULL) {
if (c != '\0') {
target.push_back(c);
}

View File

@ -27,8 +27,8 @@
Porting from Aspell to Hunspell using C-like structs
*/
#ifndef PHONET_HXX_
#define PHONET_HXX_
#ifndef __PHONETHXX__
#define __PHONETHXX__
#define HASHSIZE 256
#define MAXPHONETLEN 256
@ -38,7 +38,9 @@
struct phonetable {
char utf8;
std::vector<std::string> rules;
cs_info* lang;
int num;
char** rules;
int hash[HASHSIZE];
};

View File

@ -90,122 +90,104 @@ RepList::RepList(int n) {
RepList::~RepList() {
for (int i = 0; i < pos; i++) {
delete dat[i];
free(dat[i]->pattern);
free(dat[i]->pattern2);
free(dat[i]);
}
free(dat);
}
int RepList::get_pos() {
return pos;
}
replentry* RepList::item(int n) {
return dat[n];
}
int RepList::find(const char* word) {
int RepList::near(const char* word) {
int p1 = 0;
int p2 = pos - 1;
while (p1 <= p2) {
int p2 = pos;
while ((p2 - p1) > 1) {
int m = (p1 + p2) / 2;
int c = strncmp(word, dat[m]->pattern.c_str(), dat[m]->pattern.size());
if (c < 0)
p2 = m - 1;
else if (c > 0)
p1 = m + 1;
else { // scan back for a longer match
for (p1 = m - 1; p1 >= 0; --p1)
if (!strncmp(word, dat[p1]->pattern.c_str(), dat[p1]->pattern.size()))
m = p1;
else if (dat[p1]->pattern.size() < dat[m]->pattern.size())
break;
return m;
}
int c = strcmp(word, dat[m]->pattern);
if (c <= 0) {
if (c < 0)
p2 = m;
else
p1 = p2 = m;
} else
p1 = m;
}
return -1;
return p1;
}
std::string RepList::replace(const char* word, int ind, bool atstart) {
int type = atstart ? 1 : 0;
if (ind < 0)
return std::string();
if (strlen(word) == dat[ind]->pattern.size())
type = atstart ? 3 : 2;
while (type && dat[ind]->outstrings[type].empty())
type = (type == 2 && !atstart) ? 0 : type - 1;
return dat[ind]->outstrings[type];
}
int RepList::add(const std::string& in_pat1, const std::string& pat2) {
if (pos >= size || in_pat1.empty() || pat2.empty()) {
return 1;
}
// analyse word context
int type = 0;
std::string pat1(in_pat1);
if (pat1[0] == '_') {
pat1.erase(0, 1);
type = 1;
}
if (!pat1.empty() && pat1[pat1.size() - 1] == '_') {
type = type + 2;
pat1.erase(pat1.size() - 1);
}
mystrrep(pat1, "_", " ");
// find existing entry
int m = find(pat1.c_str());
if (m >= 0 && dat[m]->pattern == pat1) {
// since already used
dat[m]->outstrings[type] = pat2;
mystrrep(dat[m]->outstrings[type], "_", " ");
return 0;
}
// make a new entry if none exists
replentry* r = new replentry;
if (r == NULL)
return 1;
r->pattern = pat1;
r->outstrings[type] = pat2;
mystrrep(r->outstrings[type], "_", " ");
dat[pos++] = r;
// sort to the right place in the list
int i;
for (i = pos - 1; i > 0; i--) {
int c = strncmp(r->pattern.c_str(), dat[i-1]->pattern.c_str(), dat[i-1]->pattern.size());
if (c > 0)
break;
else if (c == 0) { // subpatterns match. Patterns can't be identical since would catch earlier
for (int j = i - 2; j > 0 && !strncmp(dat[i-1]->pattern.c_str(), dat[j]->pattern.c_str(), dat[i-1]->pattern.size()); --j)
if (dat[j]->pattern.size() > r->pattern.size() ||
(dat[j]->pattern.size() == r->pattern.size() && strncmp(dat[j]->pattern.c_str(), r->pattern.c_str(), r->pattern.size()) > 0)) {
i = j;
break;
}
break;
}
}
memmove(dat + i + 1, dat + i, (pos - i - 1) * sizeof(replentry *));
dat[i] = r;
int RepList::match(const char* word, int n) {
if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0)
return strlen(dat[n]->pattern);
return 0;
}
bool RepList::conv(const std::string& in_word, std::string& dest) {
int RepList::add(char* pat1, char* pat2) {
if (pos >= size || pat1 == NULL || pat2 == NULL)
return 1;
replentry* r = (replentry*)malloc(sizeof(replentry));
if (r == NULL)
return 1;
r->pattern = mystrrep(pat1, "_", " ");
r->pattern2 = mystrrep(pat2, "_", " ");
r->start = false;
r->end = false;
dat[pos++] = r;
for (int i = pos - 1; i > 0; i--) {
r = dat[i];
if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
dat[i] = dat[i - 1];
dat[i - 1] = r;
} else
break;
}
return 0;
}
int RepList::conv(const char* word, char* dest, size_t destsize) {
size_t stl = 0;
int change = 0;
for (size_t i = 0; i < strlen(word); i++) {
int n = near(word + i);
int l = match(word + i, n);
if (l) {
size_t replen = strlen(dat[n]->pattern2);
if (stl + replen >= destsize)
return -1;
strcpy(dest + stl, dat[n]->pattern2);
stl += replen;
i += l - 1;
change = 1;
} else {
if (stl + 1 >= destsize)
return -1;
dest[stl++] = word[i];
}
}
dest[stl] = '\0';
return change;
}
bool RepList::conv(const char* word, std::string& dest) {
dest.clear();
size_t wordlen = in_word.size();
const char* word = in_word.c_str();
bool change = false;
for (size_t i = 0; i < wordlen; ++i) {
int n = find(word + i);
std::string l = replace(word + i, n, i == 0);
if (!l.empty()) {
dest.append(l);
i += dat[n]->pattern.size() - 1;
for (size_t i = 0; i < strlen(word); i++) {
int n = near(word + i);
int l = match(word + i, n);
if (l) {
dest.append(dat[n]->pattern2);
i += l - 1;
change = true;
} else {
dest.push_back(word[i]);
}
}
return change;
}

View File

@ -72,15 +72,17 @@
*/
/* string replacement list class */
#ifndef REPLIST_HXX_
#define REPLIST_HXX_
#ifndef _REPLIST_HXX_
#define _REPLIST_HXX_
#include "hunvisapi.h"
#include "w_char.hxx"
#include <string>
#include <vector>
class RepList {
class LIBHUNSPELL_DLL_EXPORTED RepList {
private:
RepList(const RepList&);
RepList& operator=(const RepList&);
@ -91,13 +93,15 @@ class RepList {
int pos;
public:
explicit RepList(int n);
RepList(int n);
~RepList();
int add(const std::string& pat1, const std::string& pat2);
int get_pos();
int add(char* pat1, char* pat2);
replentry* item(int n);
int find(const char* word);
std::string replace(const char* word, int n, bool atstart);
bool conv(const std::string& word, std::string& dest);
int near(const char* word);
int match(const char* word, int n);
int conv(const char* word, char* dest, size_t destsize);
bool conv(const char* word, std::string& dest);
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -71,8 +71,8 @@
* SUCH DAMAGE.
*/
#ifndef SUGGESTMGR_HXX_
#define SUGGESTMGR_HXX_
#ifndef _SUGGESTMGR_HXX_
#define _SUGGESTMGR_HXX_
#define MAX_ROOTS 100
#define MAX_WORDS 100
@ -91,6 +91,8 @@
#define NGRAM_LOWERING (1 << 2)
#define NGRAM_WEIGHTED (1 << 3)
#include "hunvisapi.h"
#include "atypes.hxx"
#include "affixmgr.hxx"
#include "hashmgr.hxx"
@ -99,22 +101,22 @@
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
class SuggestMgr {
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr {
private:
SuggestMgr(const SuggestMgr&);
SuggestMgr& operator=(const SuggestMgr&);
private:
char* ckey;
size_t ckeyl;
std::vector<w_char> ckey_utf;
int ckeyl;
w_char* ckey_utf;
char* ctry;
size_t ctryl;
std::vector<w_char> ctry_utf;
int ctryl;
w_char* ctry_utf;
AffixMgr* pAMgr;
unsigned int maxSug;
int maxSug;
struct cs_info* csconv;
int utf8;
int langnum;
@ -124,53 +126,62 @@ class SuggestMgr {
int complexprefixes;
public:
SuggestMgr(const char* tryme, unsigned int maxn, AffixMgr* aptr);
SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr);
~SuggestMgr();
void suggest(std::vector<std::string>& slst, const char* word, int* onlycmpdsug);
void ngsuggest(std::vector<std::string>& slst, const char* word, const std::vector<HashMgr*>& rHMgr);
int suggest(char*** slst, const char* word, int nsug, int* onlycmpdsug);
int ngsuggest(char** wlst, const char* word, int ns, HashMgr** pHMgr, int md);
int suggest_auto(char*** slst, const char* word, int nsug);
int suggest_stems(char*** slst, const char* word, int nsug);
int suggest_pos_stems(char*** slst, const char* word, int nsug);
std::string suggest_morph(const std::string& word);
std::string suggest_gen(const std::vector<std::string>& pl, const std::string& pattern);
char* suggest_morph(const char* word);
char* suggest_gen(char** pl, int pln, const char* pattern);
char* suggest_morph_for_spelling_error(const char* word);
private:
void testsug(std::vector<std::string>& wlst,
const std::string& candidate,
int cpdsuggest,
int* timer,
clock_t* timelimit);
int checkword(const std::string& word, int, int*, clock_t*);
int testsug(char** wlst,
const char* candidate,
int wl,
int ns,
int cpdsuggest,
int* timer,
clock_t* timelimit);
int checkword(const char*, int, int, int*, clock_t*);
int check_forbidden(const char*, int);
void capchars(std::vector<std::string>&, const char*, int);
int replchars(std::vector<std::string>&, const char*, int);
int doubletwochars(std::vector<std::string>&, const char*, int);
int forgotchar(std::vector<std::string>&, const char*, int);
int swapchar(std::vector<std::string>&, const char*, int);
int longswapchar(std::vector<std::string>&, const char*, int);
int movechar(std::vector<std::string>&, const char*, int);
int extrachar(std::vector<std::string>&, const char*, int);
int badcharkey(std::vector<std::string>&, const char*, int);
int badchar(std::vector<std::string>&, const char*, int);
int twowords(std::vector<std::string>&, const char*, int);
int capchars(char**, const char*, int, int);
int replchars(char**, const char*, int, int);
int doubletwochars(char**, const char*, int, int);
int forgotchar(char**, const char*, int, int);
int swapchar(char**, const char*, int, int);
int longswapchar(char**, const char*, int, int);
int movechar(char**, const char*, int, int);
int extrachar(char**, const char*, int, int);
int badcharkey(char**, const char*, int, int);
int badchar(char**, const char*, int, int);
int twowords(char**, const char*, int, int);
int fixstems(char**, const char*, int);
void capchars_utf(std::vector<std::string>&, const w_char*, int wl, int);
int doubletwochars_utf(std::vector<std::string>&, const w_char*, int wl, int);
int forgotchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
int extrachar_utf(std::vector<std::string>&, const w_char*, int wl, int);
int badcharkey_utf(std::vector<std::string>&, const w_char*, int wl, int);
int badchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
int swapchar_utf(std::vector<std::string>&, const w_char*, int wl, int);
int longswapchar_utf(std::vector<std::string>&, const w_char*, int, int);
int movechar_utf(std::vector<std::string>&, const w_char*, int, int);
int capchars_utf(char**, const w_char*, int wl, int, int);
int doubletwochars_utf(char**, const w_char*, int wl, int, int);
int forgotchar_utf(char**, const w_char*, int wl, int, int);
int extrachar_utf(char**, const w_char*, int wl, int, int);
int badcharkey_utf(char**, const w_char*, int wl, int, int);
int badchar_utf(char**, const w_char*, int wl, int, int);
int swapchar_utf(char**, const w_char*, int wl, int, int);
int longswapchar_utf(char**, const w_char*, int, int, int);
int movechar_utf(char**, const w_char*, int, int, int);
int mapchars(std::vector<std::string>&, const char*, int);
int mapchars(char**, const char*, int, int);
int map_related(const char*,
std::string&,
int,
std::vector<std::string>& wlst,
char** wlst,
int,
int,
const mapentry*,
int,
const std::vector<mapentry>&,
int*,
clock_t*);
int ngram(int n, const std::string& s1, const std::string& s2, int opt);
@ -181,7 +192,7 @@ class SuggestMgr {
void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
int lcslen(const char* s, const char* s2);
int lcslen(const std::string& s, const std::string& s2);
std::string suggest_hentry_gen(hentry* rv, const char* pattern);
char* suggest_hentry_gen(hentry* rv, const char* pattern);
};
#endif

View File

@ -38,10 +38,8 @@
*
* ***** END LICENSE BLOCK ***** */
#ifndef W_CHAR_HXX_
#define W_CHAR_HXX_
#include <string>
#ifndef __WCHARHXX__
#define __WCHARHXX__
#ifndef GCC
struct w_char {
@ -68,8 +66,10 @@ struct __attribute__((packed)) w_char {
// two character arrays
struct replentry {
std::string pattern;
std::string outstrings[4]; // med, ini, fin, isol
char* pattern;
char* pattern2;
bool start;
bool end;
};
#endif

View File

@ -5,7 +5,7 @@
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
include('/ipc/chromium/chromium-config.mozbuild')
UNIFIED_SOURCES += [
SOURCES += [
'mozEnglishWordUtils.cpp',
'mozInlineSpellChecker.cpp',
'mozInlineSpellWordUtil.cpp',