diff --git a/extensions/spellcheck/hunspell/src/README.mozilla b/extensions/spellcheck/hunspell/src/README.mozilla index 9a6c8f2e3de4..7e87b21f8be7 100644 --- a/extensions/spellcheck/hunspell/src/README.mozilla +++ b/extensions/spellcheck/hunspell/src/README.mozilla @@ -1,2 +1,2 @@ -Hunspell Version: 1.6.0 +Hunspell Version: 1.6.1 Additional Patches: See patches directory. diff --git a/extensions/spellcheck/hunspell/src/affentry.cxx b/extensions/spellcheck/hunspell/src/affentry.cxx index 70b468c0a45f..4ef0c00d9b48 100644 --- a/extensions/spellcheck/hunspell/src/affentry.cxx +++ b/extensions/spellcheck/hunspell/src/affentry.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/affentry.hxx b/extensions/spellcheck/hunspell/src/affentry.hxx index 09240300d625..4bafc043f4a2 100644 --- a/extensions/spellcheck/hunspell/src/affentry.hxx +++ b/extensions/spellcheck/hunspell/src/affentry.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/affixmgr.cxx b/extensions/spellcheck/hunspell/src/affixmgr.cxx index 658a8aa091f1..ffce7bb1bdd5 100644 --- a/extensions/spellcheck/hunspell/src/affixmgr.cxx +++ b/extensions/spellcheck/hunspell/src/affixmgr.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -1290,8 +1287,8 @@ int AffixMgr::cpdrep_check(const char* word, int wl) { // search every occurence of the pattern in the word while ((r = strstr(r, reptable[i].pattern.c_str())) != NULL) { std::string candidate(word); - size_t type = r == word ? 1 : 0; - if (r - word + reptable[i].pattern.size() == lenp) + size_t type = r == word && langnum != LANG_hu ? 1 : 0; + if (r - word + reptable[i].pattern.size() == lenp && langnum != LANG_hu) type += 2; candidate.replace(r - word, lenp, reptable[i].outstrings[type]); if (candidate_check(candidate.c_str(), candidate.size())) @@ -1494,9 +1491,8 @@ int AffixMgr::defcpd_check(hentry*** words, } inline int AffixMgr::candidate_check(const char* word, int len) { - struct hentry* rv = NULL; - rv = lookup(word); + struct hentry* rv = lookup(word); if (rv) return 1; @@ -1817,7 +1813,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, // LANG_hu section: spec. Hungarian rule if (langnum == LANG_hu) { // calculate syllable number of the word - numsyllable += get_syllable(st.substr(i)); + numsyllable += get_syllable(st.substr(0, i)); // + 1 word, if syllable number of the prefix > 1 (hungarian // convention) if (pfx && (get_syllable(pfx->getKey()) > 1)) @@ -1902,7 +1898,7 @@ struct hentry* AffixMgr::compound_check(const std::string& word, (compoundend && TESTAFF(rv->astr, compoundend, rv->alen))) && (((cpdwordmax == -1) || (wordnum + 1 < cpdwordmax)) || ((cpdmaxsyllable != 0) && - (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->clen)) <= + (numsyllable + get_syllable(std::string(HENTRY_WORD(rv), rv->blen)) <= cpdmaxsyllable))) && ( // test CHECKCOMPOUNDPATTERN @@ -2383,7 +2379,7 @@ int AffixMgr::compound_check_morph(const char* word, // LANG_hu section: spec. Hungarian rule if (langnum == LANG_hu) { // calculate syllable number of the word - numsyllable += get_syllable(st.substr(i)); + numsyllable += get_syllable(st.substr(0, i)); // + 1 word, if syllable number of the prefix > 1 (hungarian // convention) @@ -3045,10 +3041,9 @@ struct hentry* AffixMgr::affix_check(const char* word, int len, const FLAG needflag, char in_compound) { - struct hentry* rv = NULL; // check all prefixes (also crossed with suffixes if allowed) - rv = prefix_check(word, len, in_compound, needflag); + struct hentry* rv = prefix_check(word, len, in_compound, needflag); if (rv) return rv; @@ -3292,7 +3287,7 @@ int AffixMgr::expand_rootword(struct guessword* wlst, wlst[nh].word = mystrdup(ts); if (!wlst[nh].word) return 0; - wlst[nh].allow = (1 == 0); + wlst[nh].allow = false; wlst[nh].orig = NULL; nh++; // add special phonetic version @@ -3300,7 +3295,7 @@ int AffixMgr::expand_rootword(struct guessword* wlst, wlst[nh].word = mystrdup(phon); if (!wlst[nh].word) return nh - 1; - wlst[nh].allow = (1 == 0); + wlst[nh].allow = false; wlst[nh].orig = mystrdup(ts); if (!wlst[nh].orig) return nh - 1; @@ -3341,7 +3336,7 @@ int AffixMgr::expand_rootword(struct guessword* wlst, wlst[nh].word = mystrdup(prefix.c_str()); if (!wlst[nh].word) return nh - 1; - wlst[nh].allow = (1 == 0); + wlst[nh].allow = false; wlst[nh].orig = mystrdup(newword.c_str()); if (!wlst[nh].orig) return nh - 1; diff --git a/extensions/spellcheck/hunspell/src/affixmgr.hxx b/extensions/spellcheck/hunspell/src/affixmgr.hxx index 83a4b42c3332..d41e69cfd290 100644 --- a/extensions/spellcheck/hunspell/src/affixmgr.hxx +++ b/extensions/spellcheck/hunspell/src/affixmgr.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/atypes.hxx b/extensions/spellcheck/hunspell/src/atypes.hxx index fe495f78ed14..f84152318922 100644 --- a/extensions/spellcheck/hunspell/src/atypes.hxx +++ b/extensions/spellcheck/hunspell/src/atypes.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/baseaffix.hxx b/extensions/spellcheck/hunspell/src/baseaffix.hxx index 579c030bcfe5..9191dba475fc 100644 --- a/extensions/spellcheck/hunspell/src/baseaffix.hxx +++ b/extensions/spellcheck/hunspell/src/baseaffix.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx index ac5cd986f2fe..df97b577aaf8 100644 --- a/extensions/spellcheck/hunspell/src/csutil.cxx +++ b/extensions/spellcheck/hunspell/src/csutil.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -518,18 +515,20 @@ unsigned char ccase(const struct cs_info* csconv, int nIndex) { w_char upper_utf(w_char u, int langnum) { unsigned short idx = (u.h << 8) + u.l; - if (idx != unicodetoupper(idx, langnum)) { - u.h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u.l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); + unsigned short upridx = unicodetoupper(idx, langnum); + if (idx != upridx) { + u.h = (unsigned char)(upridx >> 8); + u.l = (unsigned char)(upridx & 0x00FF); } return u; } w_char lower_utf(w_char u, int langnum) { unsigned short idx = (u.h << 8) + u.l; - if (idx != unicodetolower(idx, langnum)) { - u.h = (unsigned char)(unicodetolower(idx, langnum) >> 8); - u.l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) { + u.h = (unsigned char)(lwridx >> 8); + u.l = (unsigned char)(lwridx & 0x00FF); } return u; } @@ -551,12 +550,13 @@ std::string& mkallsmall(std::string& s, const struct cs_info* csconv) { } std::vector& mkallsmall_utf(std::vector& u, - int langnum) { + int langnum) { for (size_t i = 0; i < u.size(); ++i) { unsigned short idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetolower(idx, langnum)) { - u[i].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) { + u[i].h = (unsigned char)(lwridx >> 8); + u[i].l = (unsigned char)(lwridx & 0x00FF); } } return u; @@ -565,9 +565,10 @@ std::vector& mkallsmall_utf(std::vector& u, std::vector& mkallcap_utf(std::vector& u, int langnum) { for (size_t i = 0; i < u.size(); i++) { unsigned short idx = (u[i].h << 8) + u[i].l; - if (idx != unicodetoupper(idx, langnum)) { - u[i].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u[i].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); + unsigned short upridx = unicodetoupper(idx, langnum); + if (idx != upridx) { + u[i].h = (unsigned char)(upridx >> 8); + u[i].l = (unsigned char)(upridx & 0x00FF); } } return u; @@ -583,9 +584,10 @@ std::string& mkinitcap(std::string& s, const struct cs_info* csconv) { std::vector& mkinitcap_utf(std::vector& u, int langnum) { if (!u.empty()) { unsigned short idx = (u[0].h << 8) + u[0].l; - if (idx != unicodetoupper(idx, langnum)) { - u[0].h = (unsigned char)(unicodetoupper(idx, langnum) >> 8); - u[0].l = (unsigned char)(unicodetoupper(idx, langnum) & 0x00FF); + unsigned short upridx = unicodetoupper(idx, langnum); + if (idx != upridx) { + u[0].h = (unsigned char)(upridx >> 8); + u[0].l = (unsigned char)(upridx & 0x00FF); } } return u; @@ -601,9 +603,10 @@ std::string& mkinitsmall(std::string& s, const struct cs_info* csconv) { std::vector& mkinitsmall_utf(std::vector& u, int langnum) { if (!u.empty()) { unsigned short idx = (u[0].h << 8) + u[0].l; - if (idx != unicodetolower(idx, langnum)) { - u[0].h = (unsigned char)(unicodetolower(idx, langnum) >> 8); - u[0].l = (unsigned char)(unicodetolower(idx, langnum) & 0x00FF); + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) { + u[0].h = (unsigned char)(lwridx >> 8); + u[0].l = (unsigned char)(lwridx & 0x00FF); } } return u; @@ -2531,12 +2534,17 @@ int get_captype_utf8(const std::vector& word, int langnum) { size_t ncap = 0; size_t nneutral = 0; size_t firstcap = 0; - for (size_t i = 0; i < word.size(); ++i) { - unsigned short idx = (word[i].h << 8) + word[i].l; - if (idx != unicodetolower(idx, langnum)) + + std::vector::const_iterator it = word.begin(); + std::vector::const_iterator it_end = word.end(); + while (it != it_end) { + unsigned short idx = (it->h << 8) + it->l; + unsigned short lwridx = unicodetolower(idx, langnum); + if (idx != lwridx) ncap++; - if (unicodetoupper(idx, langnum) == unicodetolower(idx, langnum)) + if (unicodetoupper(idx, langnum) == lwridx) nneutral++; + ++it; } if (ncap) { unsigned short idx = (word[0].h << 8) + word[0].l; diff --git a/extensions/spellcheck/hunspell/src/csutil.hxx b/extensions/spellcheck/hunspell/src/csutil.hxx index 302d7e909561..5d83f80970c2 100644 --- a/extensions/spellcheck/hunspell/src/csutil.hxx +++ b/extensions/spellcheck/hunspell/src/csutil.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/filemgr.cxx b/extensions/spellcheck/hunspell/src/filemgr.cxx index b7c89b2bea0c..4a14de87620f 100644 --- a/extensions/spellcheck/hunspell/src/filemgr.cxx +++ b/extensions/spellcheck/hunspell/src/filemgr.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/filemgr.hxx b/extensions/spellcheck/hunspell/src/filemgr.hxx index 991e924a2524..62433aeefe80 100644 --- a/extensions/spellcheck/hunspell/src/filemgr.hxx +++ b/extensions/spellcheck/hunspell/src/filemgr.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx index 1de16901e8d6..23421b567afb 100644 --- a/extensions/spellcheck/hunspell/src/hashmgr.cxx +++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -363,12 +360,11 @@ int HashMgr::add_hidden_capitalized_word(const std::string& word, } // detect captype and modify word length for UTF-8 encoding -int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { +int HashMgr::get_clen_and_captype(const std::string& word, int* captype, std::vector &workbuf) { int len; if (utf8) { - std::vector dest_utf; - len = u8_u16(dest_utf, word); - *captype = get_captype_utf8(dest_utf, langnum); + len = u8_u16(workbuf, word); + *captype = get_captype_utf8(workbuf, langnum); } else { len = word.size(); *captype = get_captype(word, csconv); @@ -376,6 +372,11 @@ int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { return len; } +int HashMgr::get_clen_and_captype(const std::string& word, int* captype) { + std::vector workbuf; + return get_clen_and_captype(word, captype, workbuf); +} + // remove word (personal dictionary function for standalone applications) int HashMgr::remove(const std::string& word) { struct hentry* dp = lookup(word.c_str()); @@ -527,6 +528,8 @@ int HashMgr::load_tables(const char* tpath, const char* key) { // loop through all words on much list and add to hash // table and create word and affix strings + std::vector workbuf; + while (dict->getline(ts)) { mychomp(ts); // split each line into word and morphological description @@ -599,7 +602,7 @@ int HashMgr::load_tables(const char* tpath, const char* key) { } int captype; - int wcl = get_clen_and_captype(ts, &captype); + int wcl = get_clen_and_captype(ts, &captype, workbuf); const std::string *dp_str = dp.empty() ? NULL : &dp; // add the word and its index plus its capitalized form optionally if (add_word(ts, wcl, flags, al, dp_str, false) || diff --git a/extensions/spellcheck/hunspell/src/hashmgr.hxx b/extensions/spellcheck/hunspell/src/hashmgr.hxx index 812171af2b5e..da485d7afa19 100644 --- a/extensions/spellcheck/hunspell/src/hashmgr.hxx +++ b/extensions/spellcheck/hunspell/src/hashmgr.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -125,6 +122,7 @@ class HashMgr { private: int get_clen_and_captype(const std::string& word, int* captype); + int get_clen_and_captype(const std::string& word, int* captype, std::vector &workbuf); int load_tables(const char* tpath, const char* key); int add_word(const std::string& word, int wcl, diff --git a/extensions/spellcheck/hunspell/src/htypes.hxx b/extensions/spellcheck/hunspell/src/htypes.hxx index 1e6c11839099..8f66a0080e42 100644 --- a/extensions/spellcheck/hunspell/src/htypes.hxx +++ b/extensions/spellcheck/hunspell/src/htypes.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/hunspell.cxx b/extensions/spellcheck/hunspell/src/hunspell.cxx index abcdb8f3e07f..1ef11df34127 100644 --- a/extensions/spellcheck/hunspell/src/hunspell.cxx +++ b/extensions/spellcheck/hunspell/src/hunspell.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -241,8 +238,8 @@ size_t HunspellImpl::cleanword2(std::string& dest, const char* q = src.c_str(); // first skip over any leading blanks - while ((*q != '\0') && (*q == ' ')) - q++; + while (*q == ' ') + ++q; // now strip off any trailing periods (recording their presence) *pabbrev = 0; @@ -278,8 +275,8 @@ void HunspellImpl::cleanword(std::string& dest, int firstcap = 0; // first skip over any leading blanks - while ((*q != '\0') && (*q == ' ')) - q++; + while (*q == ' ') + ++q; // now strip off any trailing periods (recording their presence) *pabbrev = 0; @@ -1664,7 +1661,7 @@ std::string HunspellImpl::get_xml_par(const char* par) { if (end == '>') end = '<'; else if (end != '\'' && end != '"') - return 0; // bad XML + return dest; // bad XML for (par++; *par != '\0' && *par != end; ++par) { dest.push_back(*par); } @@ -1707,14 +1704,17 @@ bool HunspellImpl::input_conv(const std::string& word, std::string& dest) { // return the beginning of the element (attr == NULL) or the attribute const char* HunspellImpl::get_xml_pos(const char* s, const char* attr) { const char* end = strchr(s, '>'); - const char* p = s; if (attr == NULL) return end; - do { + const char* p = s; + while (1) { p = strstr(p, attr); if (!p || p >= end) return 0; - } while (*(p - 1) != ' ' && *(p - 1) != '\n'); + if (*(p - 1) == ' ' || *(p - 1) == '\n') + break; + p += strlen(attr); + } return p + strlen(attr); } diff --git a/extensions/spellcheck/hunspell/src/hunspell.hxx b/extensions/spellcheck/hunspell/src/hunspell.hxx index 43af66b5ac13..a06bdd43ab4d 100644 --- a/extensions/spellcheck/hunspell/src/hunspell.hxx +++ b/extensions/spellcheck/hunspell/src/hunspell.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/hunzip.cxx b/extensions/spellcheck/hunspell/src/hunzip.cxx index b96d06aac279..8962b100b1f7 100644 --- a/extensions/spellcheck/hunspell/src/hunzip.cxx +++ b/extensions/spellcheck/hunspell/src/hunzip.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/hunzip.hxx b/extensions/spellcheck/hunspell/src/hunzip.hxx index f4b02bff95b9..ea2bc58d2641 100644 --- a/extensions/spellcheck/hunspell/src/hunzip.hxx +++ b/extensions/spellcheck/hunspell/src/hunzip.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/langnum.hxx b/extensions/spellcheck/hunspell/src/langnum.hxx index ef53f4ee1b1f..a64d3d7869d4 100644 --- a/extensions/spellcheck/hunspell/src/langnum.hxx +++ b/extensions/spellcheck/hunspell/src/langnum.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/phonet.cxx b/extensions/spellcheck/hunspell/src/phonet.cxx index b97bbe885c19..69601a2872a5 100644 --- a/extensions/spellcheck/hunspell/src/phonet.cxx +++ b/extensions/spellcheck/hunspell/src/phonet.cxx @@ -254,8 +254,7 @@ std::string phonet(const std::string& inword, phonetable& parms) { } /** end of while (parms.rules[n][0] == c) **/ } /** end of if (n >= 0) **/ if (z0 == 0) { - if (k && !p0 && target.size() < len && c != '\0' && - (1 || target.empty() || target[target.size()-1] != c)) { + if (k && !p0 && target.size() < len && c != '\0') { /** condense only double letters **/ target.push_back(c); /// printf("\n setting \n"); diff --git a/extensions/spellcheck/hunspell/src/replist.cxx b/extensions/spellcheck/hunspell/src/replist.cxx index 89d4caa7400f..cabe382bfd45 100644 --- a/extensions/spellcheck/hunspell/src/replist.cxx +++ b/extensions/spellcheck/hunspell/src/replist.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -102,23 +99,20 @@ replentry* RepList::item(int n) { int RepList::find(const char* word) { int p1 = 0; int p2 = pos - 1; + int ret = -1; while (p1 <= p2) { - int m = (p1 + p2) / 2; + int m = ((unsigned)p1 + (unsigned)p2) >> 1; int c = strncmp(word, dat[m]->pattern.c_str(), dat[m]->pattern.size()); if (c < 0) p2 = m - 1; else if (c > 0) p1 = m + 1; - else { // scan back for a longer match - for (p1 = m - 1; p1 >= 0; --p1) - if (!strncmp(word, dat[p1]->pattern.c_str(), dat[p1]->pattern.size())) - m = p1; - else if (dat[p1]->pattern.size() < dat[m]->pattern.size()) - break; - return m; + else { // scan in the right half for a longer match + ret = m; + p1 = m + 1; } } - return -1; + return ret; } std::string RepList::replace(const char* word, int ind, bool atstart) { @@ -169,20 +163,11 @@ int RepList::add(const std::string& in_pat1, const std::string& pat2) { // sort to the right place in the list int i; for (i = pos - 1; i > 0; i--) { - int c = strncmp(r->pattern.c_str(), dat[i-1]->pattern.c_str(), dat[i-1]->pattern.size()); - if (c > 0) + if (strcmp(r->pattern.c_str(), dat[i - 1]->pattern.c_str()) < 0) { + dat[i] = dat[i - 1]; + } else break; - else if (c == 0) { // subpatterns match. Patterns can't be identical since would catch earlier - for (int j = i - 2; j > 0 && !strncmp(dat[i-1]->pattern.c_str(), dat[j]->pattern.c_str(), dat[i-1]->pattern.size()); --j) - if (dat[j]->pattern.size() > r->pattern.size() || - (dat[j]->pattern.size() == r->pattern.size() && strncmp(dat[j]->pattern.c_str(), r->pattern.c_str(), r->pattern.size()) > 0)) { - i = j; - break; - } - break; - } } - memmove(dat + i + 1, dat + i, (pos - i - 1) * sizeof(replentry *)); dat[i] = r; return 0; } diff --git a/extensions/spellcheck/hunspell/src/replist.hxx b/extensions/spellcheck/hunspell/src/replist.hxx index 2f9d350c3842..1e3efa413177 100644 --- a/extensions/spellcheck/hunspell/src/replist.hxx +++ b/extensions/spellcheck/hunspell/src/replist.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.cxx b/extensions/spellcheck/hunspell/src/suggestmgr.cxx index b9983417cb20..73ea91e3a3a3 100644 --- a/extensions/spellcheck/hunspell/src/suggestmgr.cxx +++ b/extensions/spellcheck/hunspell/src/suggestmgr.cxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, @@ -1075,10 +1072,8 @@ void SuggestMgr::ngsuggest(std::vector& wlst, u8_u16(w_target, target); } - std::vector w_entry; std::string f; std::vector w_f; - std::vector w_target2; for (size_t i = 0; i < rHMgr.size(); ++i) { while (0 != (hp = rHMgr[i]->walk_hashtable(col, hp))) { @@ -1091,13 +1086,23 @@ void SuggestMgr::ngsuggest(std::vector& wlst, continue; if (utf8) { - w_entry.clear(); - u8_u16(w_entry, HENTRY_WORD(hp)); - sc = ngram(3, w_word, w_entry, NGRAM_LONGER_WORSE + low) + - leftcommonsubstring(w_word, w_entry); + u8_u16(w_f, HENTRY_WORD(hp)); + + int leftcommon = leftcommonsubstring(w_word, w_f); + if (low) { + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + } + sc = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; } else { - sc = ngram(3, word, HENTRY_WORD(hp), NGRAM_LONGER_WORSE + low) + - leftcommonsubstring(word, HENTRY_WORD(hp)); + f.assign(HENTRY_WORD(hp)); + + int leftcommon = leftcommonsubstring(word, f.c_str()); + if (low) { + // lowering dictionary word + mkallsmall(f, csconv); + } + sc = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; } // check special pronounciation @@ -1106,13 +1111,21 @@ void SuggestMgr::ngsuggest(std::vector& wlst, copy_field(f, HENTRY_DATA(hp), MORPH_PHON)) { int sc2; if (utf8) { - w_f.clear(); - u8_u16(w_f, f.c_str()); - sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE + low) + - leftcommonsubstring(w_word, w_f); + u8_u16(w_f, f); + + int leftcommon = leftcommonsubstring(w_word, w_f); + if (low) { + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + } + sc2 = ngram(3, w_word, w_f, NGRAM_LONGER_WORSE) + leftcommon; } else { - sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE + low) + - leftcommonsubstring(word, f.c_str()); + int leftcommon = leftcommonsubstring(word, f.c_str()); + if (low) { + // lowering dictionary word + mkallsmall(f, csconv); + } + sc2 = ngram(3, word, f, NGRAM_LONGER_WORSE) + leftcommon; } if (sc2 > sc) sc = sc2; @@ -1121,7 +1134,6 @@ void SuggestMgr::ngsuggest(std::vector& wlst, int scphon = -20000; if (ph && (sc > 2) && (abs(n - (int)hp->clen) <= 3)) { if (utf8) { - w_candidate.clear(); u8_u16(w_candidate, HENTRY_WORD(hp)); mkallcap_utf(w_candidate, langnum); u16_u8(candidate, w_candidate); @@ -1129,14 +1141,13 @@ void SuggestMgr::ngsuggest(std::vector& wlst, candidate = HENTRY_WORD(hp); mkallcap(candidate, csconv); } - std::string target2 = phonet(candidate, *ph); - w_target2.clear(); + f = phonet(candidate, *ph); if (utf8) { - u8_u16(w_target2, target2.c_str()); - scphon = 2 * ngram(3, w_target, w_target2, + u8_u16(w_f, f); + scphon = 2 * ngram(3, w_target, w_f, NGRAM_LONGER_WORSE); } else { - scphon = 2 * ngram(3, target, target2, + scphon = 2 * ngram(3, target, f, NGRAM_LONGER_WORSE); } } @@ -1177,12 +1188,24 @@ void SuggestMgr::ngsuggest(std::vector& wlst, w_mw[k].l = '*'; w_mw[k].h = 0; } - thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH + low); + + if (low) { + // lowering dictionary word + mkallsmall_utf(w_mw, langnum); + } + + thresh += ngram(n, w_word, w_mw, NGRAM_ANY_MISMATCH); } else { std::string mw = word; for (int k = sp; k < n; k += 4) mw[k] = '*'; - thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH + low); + + if (low) { + // lowering dictionary word + mkallsmall(mw, csconv); + } + + thresh += ngram(n, word, mw, NGRAM_ANY_MISMATCH); } } thresh = thresh / 3; @@ -1210,7 +1233,6 @@ void SuggestMgr::ngsuggest(std::vector& wlst, return; } - std::vector w_glst_word; for (int i = 0; i < MAX_ROOTS; i++) { if (roots[i]) { struct hentry* rp = roots[i]; @@ -1225,15 +1247,25 @@ void SuggestMgr::ngsuggest(std::vector& wlst, for (int k = 0; k < nw; k++) { if (utf8) { - w_glst_word.clear(); - u8_u16(w_glst_word, glst[k].word); - sc = ngram(n, w_word, w_glst_word, - NGRAM_ANY_MISMATCH + low) + - leftcommonsubstring(w_word, w_glst_word); + u8_u16(w_f, glst[k].word); + + int leftcommon = leftcommonsubstring(w_word, w_f); + if (low) { + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + } + + sc = ngram(n, w_word, w_f, NGRAM_ANY_MISMATCH) + leftcommon; } else { - sc = ngram(n, word, glst[k].word, - NGRAM_ANY_MISMATCH + low) + - leftcommonsubstring(word, glst[k].word); + f = glst[k].word; + + int leftcommon = leftcommonsubstring(word, f.c_str()); + if (low) { + // lowering dictionary word + mkallsmall(f, csconv); + } + + sc = ngram(n, word, f, NGRAM_ANY_MISMATCH) + leftcommon; } if (sc > thresh) { @@ -1295,7 +1327,6 @@ void SuggestMgr::ngsuggest(std::vector& wlst, std::string gl; int len; if (utf8) { - w_gl.clear(); len = u8_u16(w_gl, guess[i]); mkallsmall_utf(w_gl, langnum); u16_u8(gl, w_gl); @@ -1315,22 +1346,39 @@ void SuggestMgr::ngsuggest(std::vector& wlst, } // using 2-gram instead of 3, and other weightening - w_gl.clear(); if (utf8) { u8_u16(w_gl, gl); - re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + - ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); + //w_gl is lowercase already at this point + re = ngram(2, w_word, w_gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + if (low) { + w_f = w_word; + // lowering dictionary word + mkallsmall_utf(w_f, langnum); + re += ngram(2, w_gl, w_f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } else { + re += ngram(2, w_gl, w_word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } } else { - re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED) + - ngram(2, gl, word, NGRAM_ANY_MISMATCH + low + NGRAM_WEIGHTED); + //gl is lowercase already at this point + re = ngram(2, word, gl, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + if (low) { + f = word; + // lowering dictionary word + mkallsmall(f, csconv); + re += ngram(2, gl, f, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } else { + re += ngram(2, gl, word, NGRAM_ANY_MISMATCH + NGRAM_WEIGHTED); + } } int ngram_score, leftcommon_score; if (utf8) { - ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH + low); + //w_gl is lowercase already at this point + ngram_score = ngram(4, w_word, w_gl, NGRAM_ANY_MISMATCH); leftcommon_score = leftcommonsubstring(w_word, w_gl); } else { - ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH + low); + //gl is lowercase already at this point + ngram_score = ngram(4, word, gl, NGRAM_ANY_MISMATCH); leftcommon_score = leftcommonsubstring(word, gl.c_str()); } gscore[i] = @@ -1363,7 +1411,6 @@ void SuggestMgr::ngsuggest(std::vector& wlst, // lowering rootphon[i] std::string gl; int len; - w_gl.clear(); if (utf8) { len = u8_u16(w_gl, rootsphon[i]); mkallsmall_utf(w_gl, langnum); @@ -1675,11 +1722,10 @@ std::string SuggestMgr::suggest_hentry_gen(hentry* rv, const char* pattern) { if (HENTRY_DATA(rv)) p = (char*)strstr(HENTRY_DATA2(rv), MORPH_ALLOMORPH); while (p) { - struct hentry* rv2 = NULL; p += MORPH_TAG_LEN; int plen = fieldlen(p); std::string allomorph(p, plen); - rv2 = pAMgr->lookup(allomorph.c_str()); + struct hentry* rv2 = pAMgr->lookup(allomorph.c_str()); while (rv2) { // if (HENTRY_DATA(rv2) && get_sfxcount(HENTRY_DATA(rv2)) <= // sfxcount) { @@ -1803,14 +1849,6 @@ int SuggestMgr::ngram(int n, l2 = su2.size(); if (l2 == 0) return 0; - // lowering dictionary word - const std::vector* p_su2 = &su2; - std::vector su2_copy; - if (opt & NGRAM_LOWERING) { - su2_copy = su2; - mkallsmall_utf(su2_copy, langnum); - p_su2 = &su2_copy; - } for (int j = 1; j <= n; j++) { ns = 0; for (int i = 0; i <= (l1 - j); i++) { @@ -1818,7 +1856,7 @@ int SuggestMgr::ngram(int n, for (int l = 0; l <= (l2 - j); l++) { for (k = 0; k < j; k++) { const w_char& c1 = su1[i + k]; - const w_char& c2 = (*p_su2)[l + k]; + const w_char& c2 = su2[l + k]; if ((c1.l != c2.l) || (c1.h != c2.h)) break; } @@ -1863,14 +1901,11 @@ int SuggestMgr::ngram(int n, if (l2 == 0) return 0; l1 = s1.size(); - std::string t(s2); - if (opt & NGRAM_LOWERING) - mkallsmall(t, csconv); for (int j = 1; j <= n; j++) { ns = 0; for (int i = 0; i <= (l1 - j); i++) { - //t is haystack, s1[i..i+j) is needle - if (t.find(s1.c_str()+i, 0, j) != std::string::npos) { + //s2 is haystack, s1[i..i+j) is needle + if (s2.find(s1.c_str()+i, 0, j) != std::string::npos) { ns++; } else if (opt & NGRAM_WEIGHTED) { ns--; diff --git a/extensions/spellcheck/hunspell/src/suggestmgr.hxx b/extensions/spellcheck/hunspell/src/suggestmgr.hxx index 6ba9dc8e3703..19ffc03a8403 100644 --- a/extensions/spellcheck/hunspell/src/suggestmgr.hxx +++ b/extensions/spellcheck/hunspell/src/suggestmgr.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád, diff --git a/extensions/spellcheck/hunspell/src/w_char.hxx b/extensions/spellcheck/hunspell/src/w_char.hxx index c561ffc45fdc..5accb7568f15 100644 --- a/extensions/spellcheck/hunspell/src/w_char.hxx +++ b/extensions/spellcheck/hunspell/src/w_char.hxx @@ -1,6 +1,8 @@ /* ***** BEGIN LICENSE BLOCK ***** * Version: MPL 1.1/GPL 2.0/LGPL 2.1 * + * Copyright (C) 2002-2017 Németh László + * * The contents of this file are subject to the Mozilla Public License Version * 1.1 (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at @@ -11,12 +13,7 @@ * for the specific language governing rights and limitations under the * License. * - * The Original Code is Hunspell, based on MySpell. - * - * The Initial Developers of the Original Code are - * Kevin Hendricks (MySpell) and Németh László (Hunspell). - * Portions created by the Initial Developers are Copyright (C) 2002-2005 - * the Initial Developers. All Rights Reserved. + * Hunspell is based on MySpell which is Copyright (C) 2002 Kevin Hendricks. * * Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno, * Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,