Bug 564608 - Update Hunspell to 1.2.11. r=smaug

--HG--
extra : rebase_source : b3e8bccd5b58afb4cff64fa144068f6f64258c68
This commit is contained in:
Ryan VanderMeulen 2010-06-09 08:06:40 +02:00
parent 2977fa50a8
commit 5d9a735086
29 changed files with 860 additions and 837 deletions

View File

@ -63,6 +63,8 @@ CPPSRCS += affentry.cpp \
replist.cpp \
suggestmgr.cpp \
$(NULL)
DEFINES = -DHUNSPELL_STATIC
endif
EXTRA_DSO_LDOPTS = \

View File

@ -19,6 +19,7 @@
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Ryan VanderMeulen (ryanvm@gmail.com)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -34,7 +35,7 @@
*
******* END LICENSE BLOCK *******
Hunspell Version: 1.2.8
Hunspell Version: 1.2.11
Hunspell Author: László Németh
MySpell Author: Kevin Hendricks & David Einstein

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -54,34 +55,20 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cctype>
#include <cstdio>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#endif
#include "affentry.hxx"
#include "csutil.hxx"
#ifndef MOZILLA_CLIENT
#ifndef W32
using namespace std;
#endif
#endif
PfxEntry::PfxEntry(AffixMgr* pmgr, affentry* dp)
{
// register affix manager
pmyMgr = pmgr;
// set up its intial values
// set up its initial values
aflag = dp->aflag; // flag
strip = dp->strip; // string to strip
@ -266,7 +253,7 @@ struct hentry * PfxEntry::checkword(const char * word, int len, char in_compound
//if ((opts & aeXPRODUCT) && in_compound) {
if ((opts & aeXPRODUCT)) {
he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, NULL,
he = pmyMgr->suffix_check(tmpword, tmpl, aeXPRODUCT, this, NULL,
0, NULL, FLAG_NULL, needflag, in_compound);
if (he) return he;
}
@ -315,7 +302,7 @@ struct hentry * PfxEntry::check_twosfx(const char * word, int len,
// cross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this, needflag);
he = pmyMgr->suffix_check_twosfx(tmpword, tmpl, aeXPRODUCT, this, needflag);
if (he) return he;
}
}
@ -363,7 +350,7 @@ char * PfxEntry::check_twosfx_morph(const char * word, int len,
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
return pmyMgr->suffix_check_twosfx_morph(tmpword, tmpl,
aeXPRODUCT, (AffEntry *)this, needflag);
aeXPRODUCT, this, needflag);
}
}
}
@ -447,7 +434,7 @@ char * PfxEntry::check_morph(const char * word, int len, char in_compound, const
// ross checked combined with a suffix
if ((opts & aeXPRODUCT) && (in_compound != IN_CPD_BEGIN)) {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, (AffEntry *)this,
st = pmyMgr->suffix_check_morph(tmpword, tmpl, aeXPRODUCT, this,
FLAG_NULL, needflag);
if (st) {
mystrcat(result, st, MAXLNLEN);
@ -466,7 +453,7 @@ SfxEntry::SfxEntry(AffixMgr * pmgr, affentry* dp)
// register affix manager
pmyMgr = pmgr;
// set up its intial values
// set up its initial values
aflag = dp->aflag; // char flag
strip = dp->strip; // string to strip
appnd = dp->appnd; // string to append
@ -628,14 +615,14 @@ inline int SfxEntry::test_condition(const char * st, const char * beg)
// see if this suffix is present in the word
struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
AffEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns, const FLAG cclass, const FLAG needflag,
const FLAG badflag)
{
int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
// but it does not support cross products skip it
@ -686,9 +673,9 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
if ((TESTAFF(he->astr, aflag, he->alen) || (ep && ep->getCont() &&
TESTAFF(ep->getCont(), aflag, ep->getContLen()))) &&
(((optflags & aeXPRODUCT) == 0) ||
TESTAFF(he->astr, ep->getFlag(), he->alen) ||
(ep && TESTAFF(he->astr, ep->getFlag(), he->alen)) ||
// enabled by prefix
((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen))
((contclass) && (ep && TESTAFF(contclass, ep->getFlag(), contclasslen)))
) &&
// handle cont. class
((!cclass) ||
@ -730,13 +717,13 @@ struct hentry * SfxEntry::checkword(const char * word, int len, int optflags,
// see if two-level suffix is present in the word
struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
PfxEntry* ppfx, const FLAG needflag)
{
int tmpl; // length of tmpword
struct hentry * he; // hash entry pointer
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
PfxEntry* ep = ppfx;
// if this suffix is being cross checked with a prefix
@ -792,12 +779,12 @@ struct hentry * SfxEntry::check_twosfx(const char * word, int len, int optflags,
// see if two-level suffix is present in the word
char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag)
PfxEntry* ppfx, const FLAG needflag)
{
int tmpl; // length of tmpword
unsigned char * cp;
char tmpword[MAXWORDUTF8LEN + 4];
PfxEntry* ep = (PfxEntry *) ppfx;
PfxEntry* ep = ppfx;
char * st;
char result[MAXLNLEN];
@ -845,8 +832,8 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
if ((contclass) && TESTAFF(contclass, ep->getFlag(), contclasslen)) {
st = pmyMgr->suffix_check_morph(tmpword, tmpl, 0, NULL, aflag, needflag);
if (st) {
if (((PfxEntry *) ppfx)->getMorph()) {
mystrcat(result, ((PfxEntry *) ppfx)->getMorph(), MAXLNLEN);
if (ppfx->getMorph()) {
mystrcat(result, ppfx->getMorph(), MAXLNLEN);
mystrcat(result, " ", MAXLNLEN);
}
mystrcat(result,st, MAXLNLEN);
@ -876,10 +863,10 @@ char * SfxEntry::check_twosfx_morph(const char * word, int len, int optflags,
}
// get next homonym with same affix
struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, AffEntry* ppfx,
struct hentry * SfxEntry::get_next_homonym(struct hentry * he, int optflags, PfxEntry* ppfx,
const FLAG cclass, const FLAG needflag)
{
PfxEntry* ep = (PfxEntry *) ppfx;
PfxEntry* ep = ppfx;
FLAG eFlag = ep ? ep->getFlag() : FLAG_NULL;
while (he->next_homonym) {
@ -1026,3 +1013,4 @@ first two affentries for the suffix D described earlier.
#endif

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -57,13 +58,15 @@
#ifndef _AFFIX_HXX_
#define _AFFIX_HXX_
#include "affixmgr.hxx"
#include "hunvisapi.h"
#include "atypes.hxx"
#include "baseaffix.hxx"
#include "affixmgr.hxx"
/* A Prefix Entry */
class PfxEntry : public AffEntry
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
{
AffixMgr* pmyMgr;
@ -119,7 +122,7 @@ public:
/* A Suffix Entry */
class SfxEntry : public AffEntry
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
{
AffixMgr* pmyMgr;
char * rappnd;
@ -140,16 +143,16 @@ public:
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
struct hentry * checkword(const char * word, int len, int optflags,
AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
struct hentry * check_twosfx(const char * word, int len, int optflags, AffEntry* ppfx, const FLAG needflag = NULL);
struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = NULL);
char * check_twosfx_morph(const char * word, int len, int optflags,
AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
struct hentry * get_next_homonym(struct hentry * he);
struct hentry * get_next_homonym(struct hentry * word, int optflags, AffEntry* ppfx,
struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
const FLAG cclass, const FLAG needflag);
@ -186,3 +189,5 @@ public:
};
#endif

File diff suppressed because it is too large Load Diff

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -57,14 +58,9 @@
#ifndef _AFFIXMGR_HXX_
#define _AFFIXMGR_HXX_
#ifdef MOZILLA_CLIENT
#ifdef __SUNPRO_CC // for SunONE Studio compiler
using namespace std;
#endif
#include "hunvisapi.h"
#include <stdio.h>
#else
#include <cstdio>
#endif
#include "atypes.hxx"
#include "baseaffix.hxx"
@ -76,13 +72,16 @@ using namespace std;
#define dupSFX (1 << 0)
#define dupPFX (1 << 1)
class AffixMgr
class PfxEntry;
class SfxEntry;
class LIBHUNSPELL_DLL_EXPORTED AffixMgr
{
AffEntry * pStart[SETSIZE];
AffEntry * sStart[SETSIZE];
AffEntry * pFlag[SETSIZE];
AffEntry * sFlag[SETSIZE];
PfxEntry * pStart[SETSIZE];
SfxEntry * sStart[SETSIZE];
PfxEntry * pFlag[SETSIZE];
SfxEntry * sFlag[SETSIZE];
HashMgr * pHMgr;
HashMgr ** alldic;
int * maxdic;
@ -135,8 +134,8 @@ class AffixMgr
const char * sfxappnd; // BUG: not stateless
FLAG sfxflag; // BUG: not stateless
char * derived; // BUG: not stateless
AffEntry * sfx; // BUG: not stateless
AffEntry * pfx; // BUG: not stateless
SfxEntry * sfx; // BUG: not stateless
PfxEntry * pfx; // BUG: not stateless
int checknum;
char * wordchars;
unsigned short * wordchars_utf16;
@ -173,24 +172,24 @@ public:
char in_compound, const FLAG needflag = FLAG_NULL);
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
struct hentry * suffix_check(const char * word, int len, int sfxopts,
AffEntry* ppfx, char ** wlst, int maxSug, int * ns,
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
char in_compound = IN_CPD_NOT);
struct hentry * suffix_check_twosfx(const char * word, int len,
int sfxopts, AffEntry* ppfx, const FLAG needflag = FLAG_NULL);
int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
char * affix_check_morph(const char * word, int len,
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_morph (const char * word, int len, int sfxopts,
AffEntry * ppfx, const FLAG cclass = FLAG_NULL,
PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
char * prefix_check_twosfx_morph(const char * word, int len,
char in_compound, const FLAG needflag = FLAG_NULL);
char * suffix_check_twosfx_morph(const char * word, int len,
int sfxopts, AffEntry * ppfx, const FLAG needflag = FLAG_NULL);
int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
char * morphgen(char * ts, int wl, const unsigned short * ap,
unsigned short al, char * morph, char * targetmorph, int level);
@ -216,49 +215,48 @@ public:
char hu_mov_rule, char ** result, char * partresult);
struct hentry * lookup(const char * word);
int get_numrep();
struct replentry * get_reptable();
RepList * get_iconvtable();
RepList * get_oconvtable();
struct phonetable * get_phonetable();
int get_nummap();
struct mapentry * get_maptable();
int get_numbreak();
char ** get_breaktable();
int get_numrep() const;
struct replentry * get_reptable() const;
RepList * get_iconvtable() const;
RepList * get_oconvtable() const;
struct phonetable * get_phonetable() const;
int get_nummap() const;
struct mapentry * get_maptable() const;
int get_numbreak() const;
char ** get_breaktable() const;
char * get_encoding();
int get_langnum();
int get_langnum() const;
char * get_key_string();
char * get_try_string();
const char * get_wordchars();
unsigned short * get_wordchars_utf16(int * len);
char * get_ignore();
unsigned short * get_ignore_utf16(int * len);
int get_compound();
FLAG get_compoundflag();
FLAG get_compoundbegin();
FLAG get_forbiddenword();
FLAG get_nosuggest();
FLAG get_needaffix();
FLAG get_onlyincompound();
FLAG get_compoundroot();
FLAG get_lemma_present();
int get_checknum();
char * get_possible_root();
const char * get_prefix();
const char * get_suffix();
const char * get_derived();
const char * get_version();
const int have_contclass();
int get_utf8();
int get_complexprefixes();
char * get_suffixed(char );
int get_maxngramsugs();
int get_nosplitsugs();
int get_sugswithdots(void);
FLAG get_keepcase(void);
int get_checksharps(void);
char * encode_flag(unsigned short aflag);
int get_fullstrip();
char * get_try_string() const;
const char * get_wordchars() const;
unsigned short * get_wordchars_utf16(int * len) const;
char * get_ignore() const;
unsigned short * get_ignore_utf16(int * len) const;
int get_compound() const;
FLAG get_compoundflag() const;
FLAG get_compoundbegin() const;
FLAG get_forbiddenword() const;
FLAG get_nosuggest() const;
FLAG get_needaffix() const;
FLAG get_onlyincompound() const;
FLAG get_compoundroot() const;
FLAG get_lemma_present() const;
int get_checknum() const;
const char * get_prefix() const;
const char * get_suffix() const;
const char * get_derived() const;
const char * get_version() const;
int have_contclass() const;
int get_utf8() const;
int get_complexprefixes() const;
char * get_suffixed(char ) const;
int get_maxngramsugs() const;
int get_nosplitsugs() const;
int get_sugswithdots(void) const;
FLAG get_keepcase(void) const;
int get_checksharps(void) const;
char * encode_flag(unsigned short aflag) const;
int get_fullstrip() const;
private:
int parse_file(const char * affpath, const char * key);
@ -277,13 +275,13 @@ private:
void reverse_condition(char *);
void debugflag(char * result, unsigned short flag);
int condlen(char *);
int encodeit(struct affentry * ptr, char * cs);
int build_pfxtree(AffEntry* pfxptr);
int build_sfxtree(AffEntry* sfxptr);
int encodeit(affentry &entry, char * cs);
int build_pfxtree(PfxEntry* pfxptr);
int build_sfxtree(SfxEntry* sfxptr);
int process_pfx_order();
int process_sfx_order();
AffEntry * process_pfx_in_order(AffEntry * ptr, AffEntry * nptr);
AffEntry * process_sfx_in_order(AffEntry * ptr, AffEntry * nptr);
PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
int process_pfx_tree_to_list();
int process_sfx_tree_to_list();
int redundant_condition(char, char * strip, int stripl,
@ -291,3 +289,4 @@ private:
};
#endif

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -58,6 +59,7 @@
#define _ATYPES_HXX_
#ifndef HUNSPELL_WARNING
#include <stdio.h>
#ifdef HUNSPELL_WARNING_ON
#define HUNSPELL_WARNING fprintf
#else
@ -133,8 +135,7 @@ struct guessword {
};
struct mapentry {
char * set;
w_char * set_utf16;
char ** set;
int len;
};

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -57,10 +58,10 @@
#ifndef _BASEAFF_HXX_
#define _BASEAFF_HXX_
class AffEntry
{
public:
#include "hunvisapi.h"
class LIBHUNSPELL_DLL_EXPORTED AffEntry
{
protected:
char * appnd;
char * strip;

View File

@ -18,6 +18,8 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* L. David Baron (dbaron@dbaron.org)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -54,20 +56,13 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cctype>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#endif
#include "atypes.hxx"
#include "csutil.hxx"
#include "atypes.hxx"
#include "langnum.hxx"
#ifdef OPENOFFICEORG
@ -93,16 +88,6 @@ static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CI
static NS_DEFINE_CID(kUnicharUtilCID, NS_UNICHARUTIL_CID);
#endif
#ifdef MOZILLA_CLIENT
#ifdef __SUNPRO_CC // for SunONE Studio compiler
using namespace std;
#endif
#else
#ifndef W32
using namespace std;
#endif
#endif
static struct unicode_info2 * utf_tbl = NULL;
static int utf_tbl_count = 0; // utf_tbl can be used by multiple Hunspell instances
@ -225,7 +210,7 @@ int u8_u16(w_char * dest, int size, const char * src) {
u8++;
u2++;
}
return u2 - dest;
return (int)(u2 - dest);
}
void flag_qsort(unsigned short flags[], int begin, int end) {
@ -289,11 +274,10 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
*stringp = dp+1;
int nc = (int)((unsigned long)dp - (unsigned long)mp);
*(mp+nc) = '\0';
return mp;
} else {
*stringp = mp + strlen(mp);
return mp;
}
return mp;
}
return NULL;
}
@ -303,13 +287,13 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
{
char * d = NULL;
if (s) {
int sl = strlen(s);
d = (char *) malloc(((sl+1) * sizeof(char)));
int sl = strlen(s)+1;
d = (char *) malloc(sl);
if (d) {
memcpy(d,s,((sl+1)*sizeof(char)));
return d;
memcpy(d,s,sl);
} else {
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
return d;
}
@ -341,12 +325,14 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
char * d = NULL;
if (s) {
int sl = strlen(s);
d = (char *) malloc((sl+1) * sizeof(char));
d = (char *) malloc(sl+1);
if (d) {
const char * p = s + sl - 1;
char * q = d;
while (p >= s) *q++ = *p--;
*q = '\0';
} else {
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
}
}
return d;
@ -356,6 +342,9 @@ int flag_bsearch(unsigned short flags[], unsigned short flag, int length) {
// return number of lines
int line_tok(const char * text, char *** lines, char breakchar) {
int linenum = 0;
if (!text) {
return linenum;
}
char * dup = mystrdup(text);
char * p = strchr(dup, breakchar);
while (p) {
@ -582,7 +571,7 @@ int get_sfxcount(const char * morph)
int fieldlen(const char * r)
{
int n = 0;
while (r && *r != '\t' && *r != '\0' && *r != '\n' && *r != ' ') {
while (r && *r != ' ' && *r != '\t' && *r != '\0' && *r != '\n') {
r++;
n++;
}
@ -721,7 +710,7 @@ void mkallcap_utf(w_char * u, int nc, int langnum) {
}
}
// convert null terminated string to have intial capital
// convert null terminated string to have initial capital
void mkinitcap(char * p, const struct cs_info * csconv)
{
if (*p != '\0') *p = csconv[((unsigned char)*p)].cupper;
@ -734,7 +723,7 @@ void mkallcap_utf(w_char * u, int nc, int langnum) {
}
// conversion function for protected memory
char * get_stored_pointer(char * s)
char * get_stored_pointer(const char * s)
{
char * p;
memcpy(&p, s, sizeof(char *));
@ -765,7 +754,7 @@ void mkallcap_utf(w_char * u, int nc, int langnum) {
*d = '\0';
}
// convert null terminated string to have intial capital using encoding
// convert null terminated string to have initial capital using encoding
void enmkinitcap(char * d, const char * p, const char * encoding)
{
struct cs_info * csconv = get_current_cs(encoding);
@ -777,7 +766,7 @@ void mkallcap_utf(w_char * u, int nc, int langnum) {
// encodings supported
// supplying isupper, tolower, and toupper
struct cs_info iso1_tbl[] = {
static struct cs_info iso1_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -1037,7 +1026,7 @@ struct cs_info iso1_tbl[] = {
};
struct cs_info iso2_tbl[] = {
static struct cs_info iso2_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -1297,7 +1286,7 @@ struct cs_info iso2_tbl[] = {
};
struct cs_info iso3_tbl[] = {
static struct cs_info iso3_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -1556,7 +1545,7 @@ struct cs_info iso3_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso4_tbl[] = {
static struct cs_info iso4_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -1815,7 +1804,7 @@ struct cs_info iso4_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso5_tbl[] = {
static struct cs_info iso5_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -2074,7 +2063,7 @@ struct cs_info iso5_tbl[] = {
{ 0x00, 0xff, 0xaf }
};
struct cs_info iso6_tbl[] = {
static struct cs_info iso6_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -2333,7 +2322,7 @@ struct cs_info iso6_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso7_tbl[] = {
static struct cs_info iso7_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -2592,7 +2581,7 @@ struct cs_info iso7_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso8_tbl[] = {
static struct cs_info iso8_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -2851,7 +2840,7 @@ struct cs_info iso8_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso9_tbl[] = {
static struct cs_info iso9_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -3110,7 +3099,7 @@ struct cs_info iso9_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso10_tbl[] = {
static struct cs_info iso10_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -3369,7 +3358,7 @@ struct cs_info iso10_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info koi8r_tbl[] = {
static struct cs_info koi8r_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -3628,7 +3617,7 @@ struct cs_info koi8r_tbl[] = {
{ 0x01, 0xdf, 0xff }
};
struct cs_info koi8u_tbl[] = {
static struct cs_info koi8u_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -3887,7 +3876,7 @@ struct cs_info koi8u_tbl[] = {
{ 0x01, 0xdf, 0xff }
};
struct cs_info cp1251_tbl[] = {
static struct cs_info cp1251_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -4146,7 +4135,7 @@ struct cs_info cp1251_tbl[] = {
{ 0x00, 0xff, 0xdf }
};
struct cs_info iso13_tbl[] = {
static struct cs_info iso13_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -4406,7 +4395,7 @@ struct cs_info iso13_tbl[] = {
};
struct cs_info iso14_tbl[] = {
static struct cs_info iso14_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -4665,7 +4654,7 @@ struct cs_info iso14_tbl[] = {
{ 0x00, 0xff, 0xff }
};
struct cs_info iso15_tbl[] = {
static struct cs_info iso15_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -4924,7 +4913,7 @@ struct cs_info iso15_tbl[] = {
{ 0x00, 0xff, 0xbe }
};
struct cs_info iscii_devanagari_tbl[] = {
static struct cs_info iscii_devanagari_tbl[] = {
{ 0x00, 0x00, 0x00 },
{ 0x00, 0x01, 0x01 },
{ 0x00, 0x02, 0x02 },
@ -5243,7 +5232,7 @@ struct cs_info * get_current_cs(const char * es) {
if (NS_FAILED(rv))
return nsnull;
ccs = (struct cs_info *) malloc(256 * sizeof(cs_info));
ccs = new cs_info[256];
for (unsigned int i = 0; i <= 0xff; ++i) {
PRBool success = PR_FALSE;
@ -5316,14 +5305,14 @@ char * get_casechars(const char * enc) {
}
*p = '\0';
#ifdef MOZILLA_CLIENT
delete csconv;
delete [] csconv;
#endif
return mystrdup(expw);
}
struct lang_map lang2enc[] = {
static struct lang_map lang2enc[] = {
{"ar", "UTF-8", LANG_ar},
{"az", "UTF-8", LANG_az},
{"bg", "microsoft-cp1251", LANG_bg},
@ -5379,7 +5368,7 @@ int initialize_utf_tbl() {
if (utf_tbl) return 0;
utf_tbl = (unicode_info2 *) malloc(CONTSIZE * sizeof(unicode_info2));
if (utf_tbl) {
int j;
size_t j;
for (j = 0; j < CONTSIZE; j++) {
utf_tbl[j].cletter = 0;
utf_tbl[j].clower = (unsigned short) j;

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -57,9 +58,13 @@
#ifndef __CSUTILHXX__
#define __CSUTILHXX__
#include "hunvisapi.h"
// First some base level utility routines
#include <string.h>
#include "w_char.hxx"
#include "htypes.hxx"
// casing
#define NOCAP 0
@ -100,72 +105,62 @@
#define FORBIDDENWORD 65510
#define ONLYUPCASEFLAG 65511
// hash entry macros
#define HENTRY_DATA(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : NULL)
// NULL-free version for warning-free OOo build
#define HENTRY_DATA2(h) (h->var ? ((h->var & H_OPT_ALIASM) ? \
get_stored_pointer(&(h->word) + h->blen + 1) : &(h->word) + h->blen + 1) : "")
#define HENTRY_FIND(h,p) (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL)
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
// convert UTF-16 characters to UTF-8
char * u16_u8(char * dest, int size, const w_char * src, int srclen);
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
// convert UTF-8 characters to UTF-16
int u8_u16(w_char * dest, int size, const char * src);
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
// sort 2-byte vector
void flag_qsort(unsigned short flags[], int begin, int end);
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
// binary search in 2-byte vector
int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
// remove end of line char(s)
void mychomp(char * s);
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
// duplicate string
char * mystrdup(const char * s);
LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
// strcat for limited length destination string
char * mystrcat(char * dest, const char * st, int max);
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
// duplicate reverse of string
char * myrevstrdup(const char * s);
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
// parse into tokens with char delimiter
char * mystrsep(char ** sptr, const char delim);
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
// parse into tokens with char delimiter
char * mystrsep2(char ** sptr, const char delim);
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
// parse into tokens with char delimiter
char * mystrrep(char *, const char *, const char *);
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
// append s to ends of every lines in text
void strlinecat(char * lines, const char * s);
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
// tokenize into lines with new line
int line_tok(const char * text, char *** lines, char breakchar);
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
// tokenize into lines with new line and uniq in place
char * line_uniq(char * text, char breakchar);
char * line_uniq_app(char ** text, char breakchar);
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
// change oldchar to newchar in place
char * tr(char * text, char oldc, char newc);
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
// reverse word
int reverseword(char *);
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
// reverse word
int reverseword_utf(char *);
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
// remove duplicates
int uniqlist(char ** list, int n);
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
// free character array list
void freelist(char *** list, int n);
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
// character encoding information
struct cs_info {
@ -187,11 +182,11 @@ struct unicode_info2 {
unsigned short clower;
};
int initialize_utf_tbl();
void free_utf_tbl();
unsigned short unicodetoupper(unsigned short c, int langnum);
unsigned short unicodetolower(unsigned short c, int langnum);
int unicodeisalpha(unsigned short c);
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
struct enc_entry {
const char * enc_name;
@ -206,68 +201,101 @@ struct lang_map {
int num;
};
struct cs_info * get_current_cs(const char * es);
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
const char * get_default_enc(const char * lang);
LIBHUNSPELL_DLL_EXPORTED const char * get_default_enc(const char * lang);
// get language identifiers of language codes
int get_lang_num(const char * lang);
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
// get characters of the given 8bit encoding with lower- and uppercase forms
char * get_casechars(const char * enc);
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
// convert null terminated string to all caps using encoding
void enmkallcap(char * d, const char * p, const char * encoding);
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all little using encoding
void enmkallsmall(char * d, const char * p, const char * encoding);
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
// convert null terminated string to have intial capital using encoding
void enmkinitcap(char * d, const char * p, const char * encoding);
// convert null terminated string to have initial capital using encoding
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
// convert null terminated string to all caps
void mkallcap(char * p, const struct cs_info * csconv);
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
// convert null terminated string to all little
void mkallsmall(char * p, const struct cs_info * csconv);
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
// convert null terminated string to have intial capital
void mkinitcap(char * p, const struct cs_info * csconv);
// convert null terminated string to have initial capital
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
// convert first nc characters of UTF-8 string to little
void mkallsmall_utf(w_char * u, int nc, int langnum);
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
// convert first nc characters of UTF-8 string to capital
void mkallcap_utf(w_char * u, int nc, int langnum);
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
// get type of capitalization
int get_captype(char * q, int nl, cs_info *);
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
// get type of capitalization (UTF-8)
int get_captype_utf8(w_char * q, int nl, int langnum);
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
// strip all ignored characters in the string
void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
// strip all ignored characters in the string
void remove_ignored_chars(char * word, char * ignored_chars);
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
int parse_string(char * line, char ** out, int ln);
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
int parse_array(char * line, char ** out, unsigned short ** out_utf16,
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
int * out_utf16_len, int utf8, int ln);
int fieldlen(const char * r);
char * copy_field(char * dest, const char * morph, const char * var);
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
int morphcmp(const char * s, const char * t);
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
int get_sfxcount(const char * morph);
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
// conversion function for protected memory
void store_pointer(char * dest, char * source);
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
// conversion function for protected memory
char * get_stored_pointer(char * s);
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
// hash entry macros
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
{
char *ret;
if (!h->var)
ret = NULL;
else if (h->var & H_OPT_ALIASM)
ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
else
ret = &(h->word[0]) + h->blen + 1;
return ret;
}
// NULL-free version for warning-free OOo build
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
{
const char *ret;
if (!h->var)
ret = "";
else if (h->var & H_OPT_ALIASM)
ret = get_stored_pointer(&(h->word[0]) + h->blen + 1);
else
ret = &(h->word[0]) + h->blen + 1;
return ret;
}
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
{
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
}
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
#endif

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -31,38 +32,22 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cctype>
#else
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#endif
#include <stdio.h>
#include "dictmgr.hxx"
#ifndef MOZILLA_CLIENT
#ifndef W32
using namespace std;
#endif
#endif
DictMgr::DictMgr(const char * dictpath, const char * etype)
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
{
// load list of etype entries
numdict = 0;
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
if (pdentry) {
if (parse_file(dictpath, etype)) {
numdict = 0;
// no dictionary.lst found is okay
}
} else {
numdict = 0;
}
}
@ -144,6 +129,16 @@ int DictMgr::parse_file(const char * dictpath, const char * etype)
numdict++;
pdict++;
} else {
switch (i) {
case 3:
free(pdict->region);
pdict->region=NULL;
case 2: //deliberate fallthrough
free(pdict->lang);
pdict->lang=NULL;
default:
break;
}
fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
fflush(stderr);
}
@ -181,7 +176,6 @@ char * DictMgr::mystrsep(char ** stringp, const char delim)
if (rv) {
memcpy(rv,mp,nc);
*(rv+nc) = '\0';
return rv;
}
} else {
rv = (char *) malloc(n+1);
@ -189,11 +183,10 @@ char * DictMgr::mystrsep(char ** stringp, const char delim)
memcpy(rv, mp, n);
*(rv+n) = '\0';
*stringp = mp + n;
return rv;
}
}
}
return NULL;
return rv;
}
@ -202,9 +195,9 @@ char * DictMgr::mystrdup(const char * s)
{
char * d = NULL;
if (s) {
int sl = strlen(s);
d = (char *) malloc(((sl+1) * sizeof(char)));
if (d) memcpy(d,s,((sl+1)*sizeof(char)));
int sl = strlen(s)+1;
d = (char *) malloc(sl);
if (d) memcpy(d,s,sl);
}
return d;
}
@ -217,3 +210,4 @@ void DictMgr:: mychomp(char * s)
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
}

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -34,6 +35,8 @@
#ifndef _DICTMGR_HXX_
#define _DICTMGR_HXX_
#include "hunvisapi.h"
#define MAXDICTIONARIES 100
#define MAXDICTENTRYLEN 1024
@ -44,7 +47,7 @@ struct dictentry {
};
class DictMgr
class LIBHUNSPELL_DLL_EXPORTED DictMgr
{
int numdict;

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -31,15 +32,9 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#else
#include <stdlib.h>
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#endif
#include <stdio.h>
#include "filemgr.hxx"
@ -54,11 +49,12 @@ FileMgr::FileMgr(const char * file, const char * key) {
fin = fopen(file, "r");
if (!fin) {
// check hzipped file
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION));
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
if (st) {
strcpy(st, file);
strcat(st, HZIP_EXTENSION);
hin = new Hunzip(st, key);
free(st);
}
}
if (!fin && !hin) fail(MSG_OPEN, file);

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -34,9 +35,12 @@
#ifndef _FILEMGR_HXX_
#define _FILEMGR_HXX_
#include "hunzip.hxx"
#include "hunvisapi.h"
class FileMgr
#include "hunzip.hxx"
#include <stdio.h>
class LIBHUNSPELL_DLL_EXPORTED FileMgr
{
protected:
FILE * fin;

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -54,31 +55,14 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cctype>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#endif
#include "atypes.hxx"
#include "csutil.hxx"
#include "hashmgr.hxx"
#ifdef MOZILLA_CLIENT
#ifdef __SUNPRO_CC // for SunONE Studio compiler
using namespace std;
#endif
#else
#ifndef W32
using namespace std;
#endif
#endif
#include "csutil.hxx"
#include "atypes.hxx"
// build a hash table from a munched word list
@ -160,6 +144,10 @@ HashMgr::~HashMgr()
if (ignorechars) free(ignorechars);
if (ignorechars_utf16) free(ignorechars_utf16);
#ifdef MOZILLA_CLIENT
delete [] csconv;
#endif
}
// lookup a root word in the hashtable
@ -171,7 +159,7 @@ struct hentry * HashMgr::lookup(const char *word) const
dp = tableptr[hash(word)];
if (!dp) return NULL;
for ( ; dp != NULL; dp = dp->next) {
if (strcmp(word,&(dp->word)) == 0) return dp;
if (strcmp(word, dp->word) == 0) return dp;
}
}
return NULL;
@ -187,7 +175,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
struct hentry* hp =
(struct hentry *) malloc (sizeof(struct hentry) + wbl + descl);
if (!hp) return 1;
char * hpw = &(hp->word);
char * hpw = hp->word;
strcpy(hpw, word);
if (ignorechars != NULL) {
if (utf8) {
@ -231,7 +219,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
return 0;
}
while (dp->next != NULL) {
if ((!dp->next_homonym) && (strcmp(&(hp->word), &(dp->word)) == 0)) {
if ((!dp->next_homonym) && (strcmp(hp->word, dp->word) == 0)) {
// remove hidden onlyupcase homonym
if (!onlyupcase) {
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
@ -249,7 +237,7 @@ int HashMgr::add_word(const char * word, int wbl, int wcl, unsigned short * aff,
}
dp=dp->next;
}
if (strcmp(&(hp->word), &(dp->word)) == 0) {
if (strcmp(hp->word, dp->word) == 0) {
// remove hidden onlyupcase homonym
if (!onlyupcase) {
if ((dp->astr) && TESTAFF(dp->astr, ONLYUPCASEFLAG, dp->alen)) {
@ -326,7 +314,7 @@ int HashMgr::remove(const char * word)
while (dp) {
if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
unsigned short * flags =
(unsigned short *) malloc(sizeof(short *) * (dp->alen + 1));
(unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
if (!flags) return 1;
for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
flags[dp->alen] = forbiddenword;
@ -348,7 +336,7 @@ int HashMgr::remove_forbidden_flag(const char * word) {
if (dp->alen == 1) dp->alen = 0; // XXX forbidden words of personal dic.
else {
unsigned short * flags2 =
(unsigned short *) malloc(sizeof(short *) * (dp->alen - 1));
(unsigned short *) malloc(sizeof(short) * (dp->alen - 1));
if (!flags2) return 1;
int i, j = 0;
for (i = 0; i < dp->alen; i++) {
@ -439,7 +427,7 @@ int HashMgr::load_tables(const char * tpath, const char * key)
/* remove byte order mark */
if (strncmp(ts,"\xEF\xBB\xBF",3) == 0) {
memmove(ts, ts+3, strlen(ts+3)+1);
HUNSPELL_WARNING(stderr, "warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions\n");
// warning: dic file begins with byte order mark: possible incompatibility with old Hunspell versions
}
tablesize = atoi(ts);
@ -512,6 +500,10 @@ int HashMgr::load_tables(const char * tpath, const char * key)
}
} else {
al = decode_flags(&flags, ap + 1, dict);
if (al == -1) {
HUNSPELL_WARNING(stderr, "Can't allocate memory.\n");
return 6;
}
flag_qsort(flags, 0, al);
}
} else {
@ -552,6 +544,11 @@ int HashMgr::hash(const char * word) const
int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af) {
int len;
if (*flags == '\0') {
HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
*result = NULL;
return 0;
}
switch (flag_mode) {
case FLAG_LONG: { // two-character flags (1x2yZz -> 1x 2y Zz)
len = strlen(flags);

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -57,18 +58,16 @@
#ifndef _HASHMGR_HXX_
#define _HASHMGR_HXX_
#ifndef MOZILLA_CLIENT
#include <cstdio>
#else
#include <stdio.h>
#endif
#include "hunvisapi.h"
#include <stdio.h>
#include "filemgr.hxx"
#include "htypes.hxx"
#include "filemgr.hxx"
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
class HashMgr
class LIBHUNSPELL_DLL_EXPORTED HashMgr
{
int tablesize;
struct hentry ** tableptr;

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -68,7 +69,7 @@
#define H_OPT_PHON (1 << 2)
// see also csutil.hxx
#define HENTRY_WORD(h) &(h->word)
#define HENTRY_WORD(h) (h->word)
// approx. number of user defined words
#define USERWORD 1000
@ -82,7 +83,7 @@ struct hentry
struct hentry * next; // next word with same hash code
struct hentry * next_homonym; // next homonym word (with same hash code)
char var; // variable fields (only for special pronounciation yet)
char word; // variable-length word (8-bit or UTF-8 encoding)
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
};
#endif

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -54,25 +55,16 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#endif
#include "csutil.hxx"
#include "hunspell.h"
#include "hunspell.hxx"
#include "hunspell.h"
#ifndef MOZILLA_CLIENT
#ifndef W32
using namespace std;
#endif
# include "config.h"
#endif
#include "csutil.hxx"
Hunspell::Hunspell(const char * affpath, const char * dpath, const char * key)
{
@ -115,7 +107,7 @@ Hunspell::~Hunspell()
pSMgr = NULL;
pAMgr = NULL;
#ifdef MOZILLA_CLIENT
free(csconv);
delete [] csconv;
#endif
csconv= NULL;
if (encoding) free(encoding);
@ -454,21 +446,24 @@ int Hunspell::spell(const char * word, int * info, char ** root)
// prefixes separated by apostrophe (SANT'ELIA -> Sant'+Elia).
if (pAMgr && strchr(cw, '\'')) {
wl = mkallsmall2(cw, unicw, nc);
char * apostrophe = strchr(cw, '\'');
if (utf8) {
w_char tmpword[MAXWORDLEN];
*apostrophe = '\0';
wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
*apostrophe = '\'';
if (wl2 < nc) {
mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
rv = checkword(cw, info, root);
if (rv) break;
//There are no really sane circumstances where this could fail,
//but anyway...
if (char * apostrophe = strchr(cw, '\'')) {
if (utf8) {
w_char tmpword[MAXWORDLEN];
*apostrophe = '\0';
wl2 = u8_u16(tmpword, MAXWORDLEN, cw);
*apostrophe = '\'';
if (wl2 < nc) {
mkinitcap2(apostrophe + 1, unicw + wl2 + 1, nc - wl2 - 1);
rv = checkword(cw, info, root);
if (rv) break;
}
} else {
mkinitcap2(apostrophe + 1, unicw, nc);
rv = checkword(cw, info, root);
if (rv) break;
}
} else {
mkinitcap2(apostrophe + 1, unicw, nc);
rv = checkword(cw, info, root);
if (rv) break;
}
mkinitcap2(cw, unicw, nc);
rv = checkword(cw, info, root);
@ -548,9 +543,23 @@ int Hunspell::spell(const char * word, int * info, char ** root)
if (wordbreak) {
char * s;
char r;
int corr = 0;
int nbr = 0;
wl = strlen(cw);
int numbreak = pAMgr ? pAMgr->get_numbreak() : 0;
// calculate break points for recursion limit
for (int j = 0; j < numbreak; j++) {
s = cw;
do {
s = (char *) strstr(s, wordbreak[j]);
if (s) {
nbr++;
s++;
}
} while (s);
}
if (nbr >= 10) return 0;
// check boundary patterns (^begin and end$)
for (int j = 0; j < numbreak; j++) {
int plen = strlen(wordbreak[j]);
@ -565,9 +574,9 @@ int Hunspell::spell(const char * word, int * info, char ** root)
cw[wl - plen + 1] = r;
}
}
// other patterns
for (int j = 0; j < numbreak; j++) {
int result = 0;
int plen = strlen(wordbreak[j]);
s=(char *) strstr(cw, wordbreak[j]);
if (s && (s > cw) && (s < cw + wl - plen)) {
@ -669,7 +678,7 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
return NULL;
}
if (root) {
*root = mystrdup(&(he->word));
*root = mystrdup(he->word);
if (*root && complexprefixes) {
if (utf8) reverseword_utf(*root); else reverseword(*root);
}
@ -688,7 +697,7 @@ struct hentry * Hunspell::checkword(const char * w, int * info, char ** root)
// end of LANG speficic region
if (he) {
if (root) {
*root = mystrdup(&(he->word));
*root = mystrdup(he->word);
if (*root && complexprefixes) {
if (utf8) reverseword_utf(*root); else reverseword(*root);
}
@ -866,7 +875,7 @@ int Hunspell::suggest(char*** slst, const char * word)
// END OF LANG_hu section
// try ngram approach since found nothing
if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0)) {
if ((ns == 0 || onlycmpdsug) && pAMgr && (pAMgr->get_maxngramsugs() != 0) && (*slst)) {
switch(captype) {
case NOCAP: {
ns = pSMgr->ngsuggest(*slst, cw, ns, pHMgr, maxdic);
@ -900,15 +909,16 @@ int Hunspell::suggest(char*** slst, const char * word)
}
// try dash suggestion (Afo-American -> Afro-American)
if (strchr(cw, '-')) {
char * pos = strchr(cw, '-');
if (char * pos = strchr(cw, '-')) {
char * ppos = cw;
int nodashsug = 1;
char ** nlst = NULL;
int nn = 0;
int last = 0;
for (int j = 0; j < ns && nodashsug == 1; j++) {
if (strchr((*slst)[j], '-')) nodashsug = 0;
if (*slst) {
for (int j = 0; j < ns && nodashsug == 1; j++) {
if (strchr((*slst)[j], '-')) nodashsug = 0;
}
}
while (nodashsug && !last) {
if (*pos == '\0') last = 1; else *pos = '\0';
@ -1517,7 +1527,10 @@ int Hunspell::analyze(char*** slst, const char * word)
*dash='\0';
// examine 2 sides of the dash
if (dash[1] == '\0') { // base word ending with dash
if (spell(cw)) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
if (spell(cw)) {
char * p = pSMgr->suggest_morph(cw);
if (p) return line_tok(pSMgr->suggest_morph(cw), slst, MSEP_REC);
}
} else if ((dash[1] == 'e') && (dash[2] == '\0')) { // XXX (HU) -e hat.
if (spell(cw) && (spell("-e"))) {
st = pSMgr->suggest_morph(cw);
@ -1660,7 +1673,12 @@ int Hunspell::get_xml_par(char * dest, const char * par, int max)
*d = '\0';
mystrrep(dest, "&lt;", "<");
mystrrep(dest, "&amp;", "&");
return d - dest;
return (int)(d - dest);
}
int Hunspell::get_langnum() const
{
return langnum;
}
// return the beginning of the element (attr == NULL) or the attribute
@ -1693,9 +1711,12 @@ int Hunspell::get_xml_list(char ***slst, char * list, const char * tag) {
if (!*slst) return 0;
for (p = list, n = 0; (p = strstr(p, tag)); p++, n++) {
int l = strlen(p);
(*slst)[n] = (char *) malloc(l);
(*slst)[n] = (char *) malloc(l + 1);
if (!(*slst)[n]) return (n > 0 ? n - 1 : 0);
get_xml_par((*slst)[n], p + strlen(tag) - 1, l);
if (!get_xml_par((*slst)[n], p + strlen(tag) - 1, l)) {
free((*slst)[n]);
break;
}
}
return n;
}
@ -1712,7 +1733,7 @@ int Hunspell::spellml(char*** slst, const char * word)
if (!q2) return 0; // bad XML input
if (check_xml_par(q, "type=", "analyze")) {
int n = 0, s = 0;
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) n = analyze(slst, cw);
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 10)) n = analyze(slst, cw);
if (n == 0) return 0;
// convert the result to <code><a>ana1</a><a>ana2</a></code> format
for (int i = 0; i < n; i++) s+= strlen((*slst)[i]);
@ -1733,13 +1754,13 @@ int Hunspell::spellml(char*** slst, const char * word)
(*slst)[0] = r;
return 1;
} else if (check_xml_par(q, "type=", "stem")) {
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN)) return stem(slst, cw);
if (get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1)) return stem(slst, cw);
} else if (check_xml_par(q, "type=", "generate")) {
int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN);
int n = get_xml_par(cw, strchr(q2, '>'), MAXWORDUTF8LEN - 1);
if (n == 0) return 0;
char * q3 = strstr(q2 + 1, "<word");
if (q3) {
if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN)) {
if (get_xml_par(cw2, strchr(q3, '>'), MAXWORDUTF8LEN - 1)) {
return generate(slst, cw, cw2);
}
} else {
@ -1948,7 +1969,7 @@ int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word)
return ((Hunspell*)pHunspell)->stem(slst, word);
}
int Hunspell_stem(Hunhandle *pHunspell, char*** slst, char** desc, int n)
int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n)
{
return ((Hunspell*)pHunspell)->stem(slst, desc, n);
}
@ -1959,7 +1980,7 @@ int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
return ((Hunspell*)pHunspell)->generate(slst, word, word2);
}
int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
char** desc, int n)
{
return ((Hunspell*)pHunspell)->generate(slst, word, desc, n);
@ -1989,6 +2010,6 @@ int Hunspell_remove(Hunhandle *pHunspell, const char * word) {
return ((Hunspell*)pHunspell)->remove(word);
}
void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n) {
void Hunspell_free_list(Hunhandle *, char *** slst, int n) {
freelist(slst, n);
}

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -57,31 +58,27 @@
#ifndef _MYSPELLMGR_H_
#define _MYSPELLMGR_H_
#include "hunvisapi.h"
#ifdef __cplusplus
extern "C" {
#endif
typedef struct Hunhandle Hunhandle;
#ifdef _MSC_VER
#define DLL __declspec ( dllexport )
#else
#define DLL
#endif
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
DLL Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
DLL Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
const char * key);
DLL void Hunspell_destroy(Hunhandle *pHunspell);
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
/* spell(word) - spellcheck word
* output: 0 = bad word, not 0 = good word
*/
DLL int Hunspell_spell(Hunhandle *pHunspell, const char *);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
DLL char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
/* suggest(suggestions, word) - search suggestions
* input: pointer to an array of strings pointer and the (bad) word
@ -90,17 +87,17 @@ DLL char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
* a newly allocated array of strings (*slts will be NULL when number
* of suggestion equals 0.)
*/
DLL int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
/* morphological functions */
/* analyze(result, word) - morphological analysis of the word */
DLL int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, word) - stemmer function */
DLL int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
/* stem(result, analysis, n) - get stems from a morph. analysis
* example:
@ -109,11 +106,11 @@ DLL int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
* int n2 = Hunspell_stem2(result2, result, n1);
*/
DLL int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
/* generate(result, word, word2) - morphological generation by example(s) */
DLL int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
const char * word2);
/* generate(result, word, desc, n) - generation by morph. description(s)
@ -124,29 +121,29 @@ DLL int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
*/
DLL int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
char** desc, int n);
/* functions for run-time modification of the dictionary */
/* add word to the run-time dictionary */
DLL int Hunspell_add(Hunhandle *pHunspell, const char * word);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
/* add word to the run-time dictionary with affix flags of
* the example (a dictionary word): Hunspell will recognize
* affixed forms of the new word, too.
*/
DLL int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
/* remove word from the run-time dictionary */
DLL int Hunspell_remove(Hunhandle *pHunspell, const char * word);
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
/* free suggestion lists */
DLL void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
#ifdef __cplusplus
}

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -54,10 +55,12 @@
*
******* END LICENSE BLOCK *******/
#include "affixmgr.hxx"
#include "hunvisapi.h"
#include "hashmgr.hxx"
#include "langnum.hxx"
#include "affixmgr.hxx"
#include "suggestmgr.hxx"
#include "langnum.hxx"
#define SPELL_COMPOUND (1 << 0)
#define SPELL_FORBIDDEN (1 << 1)
@ -74,21 +77,7 @@
#ifndef _MYSPELLMGR_HXX_
#define _MYSPELLMGR_HXX_
#ifdef HUNSPELL_STATIC
#define DLLEXPORT
#else
#ifdef HUNSPELL_EXPORTS
#define DLLEXPORT __declspec( dllexport )
#else
#define DLLEXPORT __declspec( dllimport )
#endif
#endif
#ifdef W32
class DLLEXPORT Hunspell
#else
class Hunspell
#endif
class LIBHUNSPELL_DLL_EXPORTED Hunspell
{
AffixMgr* pAMgr;
HashMgr* pHMgr[MAXDIC];
@ -200,6 +189,8 @@ public:
struct cs_info * get_csconv();
const char * get_version();
int get_langnum() const;
/* experimental and deprecated functions */
@ -211,7 +202,6 @@ public:
/* spec. suggestions */
int suggest_auto(char*** slst, const char * word);
int suggest_pos_stems(char*** slst, const char * word);
char * get_possible_root();
#endif
private:

View File

@ -0,0 +1,51 @@
/******* BEGIN LICENSE BLOCK *******
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
* The contents of this file are subject to the Mozilla Public License Version
* 1.1 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
* http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS IS" basis,
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
* for the specific language governing rights and limitations under the
* License.
*
* The Initial Developers of the Original Code is Caolan McNamara.
* Portions created by the Initial Developer are Copyright (C) 2010 the
* Initial Developer. All Rights Reserved.
*
* Contributor(s): Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
* in which case the provisions of the GPL or the LGPL are applicable instead
* of those above. If you wish to allow use of your version of this file only
* under the terms of either the GPL or the LGPL, and not to allow others to
* use your version of this file under the terms of the MPL, indicate your
* decision by deleting the provisions above and replace them with the notice
* and other provisions required by the GPL or the LGPL. If you do not delete
* the provisions above, a recipient may use your version of this file under
* the terms of any one of the MPL, the GPL or the LGPL.
*
******* END LICENSE BLOCK *******/
#ifndef _HUNSPELL_VISIBILITY_H_
#define _HUNSPELL_VISIBILITY_H_
#if defined(HUNSPELL_STATIC)
# define LIBHUNSPELL_DLL_EXPORTED
#elif defined(_MSC_VER)
# if defined(BUILDING_LIBHUNSPELL)
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllexport)
# else
# define LIBHUNSPELL_DLL_EXPORTED __declspec(dllimport)
# endif
#elif BUILDING_LIBHUNSPELL && 1
# define LIBHUNSPELL_DLL_EXPORTED __attribute__((__visibility__("default")))
#else
# define LIBHUNSPELL_DLL_EXPORTED
#endif
#endif

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -31,15 +32,9 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#endif
#include "hunzip.hxx"
@ -62,6 +57,7 @@ Hunzip::Hunzip(const char * file, const char * key) {
inc = 0;
outc = 0;
dec = NULL;
fin = NULL;
filename = (char *) malloc(strlen(file) + 1);
if (filename) strcpy(filename, file);
if (getcode(key) == -1) bufsiz = -1;
@ -74,6 +70,8 @@ int Hunzip::getcode(const char * key) {
int allocatedbit = BASEBITREC;
const char * enc = key;
if (!filename) return -1;
fin = fopen(filename, "rb");
if (!fin) return -1;

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -34,6 +35,10 @@
#ifndef _HUNZIP_HXX_
#define _HUNZIP_HXX_
#include "hunvisapi.h"
#include <stdio.h>
#define BUFSIZE 65536
#define HZIP_EXTENSION ".hz"
@ -47,7 +52,7 @@ struct bit {
int v[2];
};
class Hunzip
class LIBHUNSPELL_DLL_EXPORTED Hunzip
{
protected:

View File

@ -17,6 +17,7 @@
*
* Contributor(s): Björn Jacke (bjoern.jacke@gmx.de)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -43,17 +44,10 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cctype>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#endif
#include "csutil.hxx"
#include "phonet.hxx"
@ -76,15 +70,15 @@ void init_phonet_hash(phonetable & parms)
}
}
// like strcpy but safe if the strings overlap
// but only if dest < src
static inline void strmove(char * dest, char * src) {
while (*src)
*dest++ = *src++;
*dest = '\0';
}
// like strcpy but safe if the strings overlap
// but only if dest < src
static inline void strmove(char * dest, char * src) {
while (*src)
*dest++ = *src++;
*dest = '\0';
}
int myisalpha(char ch) {
static int myisalpha(char ch) {
if ((unsigned char) ch < 128) return isalpha(ch);
return 1;
}

View File

@ -17,6 +17,7 @@
*
* Contributor(s): Björn Jacke (bjoern.jacke@gmx.de)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -50,6 +51,8 @@
#define MAXPHONETLEN 256
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
#include "hunvisapi.h"
struct phonetable {
char utf8;
cs_info * lang;
@ -58,9 +61,9 @@ struct phonetable {
int hash[HASHSIZE];
};
void init_phonet_hash(phonetable & parms);
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
int phonet (const char * inword, char * target,
LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
int len, phonetable & phone);
#endif

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -31,15 +32,9 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#endif
#include "replist.hxx"
#include "csutil.hxx"
@ -109,7 +104,7 @@ int RepList::conv(const char * word, char * dest) {
int stl = 0;
int change = 0;
// for (int i = 0; i < pos; i++) fprintf(stderr, "%d. %s\n", i, dat[i]->pattern);
for (int i = 0; i < strlen(word); i++) {
for (size_t i = 0; i < strlen(word); i++) {
int n = near(word + i);
int l = match(word + i, n);
if (l) {

View File

@ -16,6 +16,7 @@
* are Copyright (C) 2002-2005 the Initial Developers. All Rights Reserved.
*
* Contributor(s): László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -34,9 +35,12 @@
/* string replacement list class */
#ifndef _REPLIST_HXX_
#define _REPLIST_HXX_
#include "hunvisapi.h"
#include "w_char.hxx"
class RepList
class LIBHUNSPELL_DLL_EXPORTED RepList
{
protected:
replentry ** dat;

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -54,27 +55,14 @@
*
******* END LICENSE BLOCK *******/
#ifndef MOZILLA_CLIENT
#include <cstdlib>
#include <cstring>
#include <cstdio>
#include <cctype>
#else
#include <stdlib.h>
#include <string.h>
#include <stdio.h>
#include <ctype.h>
#endif
#include "csutil.hxx"
#include "htypes.hxx"
#include "suggestmgr.hxx"
#ifndef MOZILLA_CLIENT
#ifndef W32
using namespace std;
#endif
#endif
#include "htypes.hxx"
#include "csutil.hxx"
const w_char W_VLINE = { '\0', '|' };
@ -86,6 +74,8 @@ SuggestMgr::SuggestMgr(const char * tryme, int maxn,
// try when building candidate suggestions
pAMgr = aptr;
csconv = NULL;
ckeyl = 0;
ckey = NULL;
ckey_utf = NULL;
@ -120,6 +110,7 @@ SuggestMgr::SuggestMgr(const char * tryme, int maxn,
ckeyl = u8_u16(t, MAXSWL, ckey);
ckey_utf = (w_char *) malloc(ckeyl * sizeof(w_char));
if (ckey_utf) memcpy(ckey_utf, t, ckeyl * sizeof(w_char));
else ckeyl = 0;
} else {
ckeyl = strlen(ckey);
}
@ -153,6 +144,9 @@ SuggestMgr::~SuggestMgr()
ctry_utf = NULL;
ctryl = 0;
maxSug = 0;
#ifdef MOZILLA_CLIENT
delete [] csconv;
#endif
}
int SuggestMgr::testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
@ -379,9 +373,11 @@ int SuggestMgr::capchars(char** wlst, const char * word, int ns, int cpdsuggest)
// suggestions for when chose the wrong char out of a related set
int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest)
{
char candidate[MAXSWUTF8L];
clock_t timelimit;
int timer;
candidate[0] = '\0';
int wl = strlen(word);
if (wl < 2 || ! pAMgr) return ns;
@ -391,27 +387,22 @@ int SuggestMgr::mapchars(char** wlst, const char * word, int ns, int cpdsuggest)
timelimit = clock();
timer = MINTIMER;
if (utf8) {
w_char w[MAXSWL];
int len = u8_u16(w, MAXSWL, word);
ns = map_related_utf(w, len, 0, cpdsuggest, wlst, ns, maptable, nummap, &timer, &timelimit);
} else ns = map_related(word, 0, wlst, cpdsuggest, ns, maptable, nummap, &timer, &timelimit);
return ns;
return map_related(word, (char *) &candidate, 0, 0, wlst, cpdsuggest, ns, maptable, nummap, &timer, &timelimit);
}
int SuggestMgr::map_related(const char * word, int i, char** wlst,
int cpdsuggest, int ns,
int SuggestMgr::map_related(const char * word, char * candidate, int wn, int cn,
char** wlst, int cpdsuggest, int ns,
const mapentry* maptable, int nummap, int * timer, clock_t * timelimit)
{
char c = *(word + i);
if (c == 0) {
if (*(word + wn) == '\0') {
int cwrd = 1;
int wl = strlen(word);
*(candidate + cn) = '\0';
int wl = strlen(candidate);
for (int m=0; m < ns; m++)
if (strcmp(word,wlst[m]) == 0) cwrd = 0;
if ((cwrd) && checkword(word, wl, cpdsuggest, timer, timelimit)) {
if (strcmp(candidate, wlst[m]) == 0) cwrd = 0;
if ((cwrd) && checkword(candidate, wl, cpdsuggest, timer, timelimit)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(word);
wlst[ns] = mystrdup(candidate);
if (wlst[ns] == NULL) return -1;
ns++;
}
@ -420,72 +411,27 @@ int SuggestMgr::map_related(const char * word, int i, char** wlst,
}
int in_map = 0;
for (int j = 0; j < nummap; j++) {
if (strchr(maptable[j].set,c) != 0) {
in_map = 1;
char * newword = mystrdup(word);
if (!newword) return -1;
for (int k = 0; k < maptable[j].len; k++) {
*(newword + i) = *(maptable[j].set + k);
ns = map_related(newword, (i+1), wlst, cpdsuggest,
ns, maptable, nummap, timer, timelimit);
if (!(*timer)) return ns;
for (int k = 0; k < maptable[j].len; k++) {
int len = strlen(maptable[j].set[k]);
if (strncmp(maptable[j].set[k], word + wn, len) == 0) {
in_map = 1;
for (int l = 0; l < maptable[j].len; l++) {
strcpy(candidate + cn, maptable[j].set[l]);
ns = map_related(word, candidate, wn + len, strlen(candidate), wlst,
cpdsuggest, ns, maptable, nummap, timer, timelimit);
if (!(*timer)) return ns;
}
}
free(newword);
}
}
if (!in_map) {
i++;
ns = map_related(word, i, wlst, cpdsuggest,
*(candidate + cn) = *(word + wn);
ns = map_related(word, candidate, wn + 1, cn + 1, wlst, cpdsuggest,
ns, maptable, nummap, timer, timelimit);
}
return ns;
}
int SuggestMgr::map_related_utf(w_char * word, int len, int i, int cpdsuggest,
char** wlst, int ns, const mapentry* maptable, int nummap,
int * timer, clock_t * timelimit)
{
if (i == len) {
int cwrd = 1;
int wl;
char s[MAXSWUTF8L];
u16_u8(s, MAXSWUTF8L, word, len);
wl = strlen(s);
for (int m=0; m < ns; m++)
if (strcmp(s,wlst[m]) == 0) cwrd = 0;
if ((cwrd) && checkword(s, wl, cpdsuggest, timer, timelimit)) {
if (ns < maxSug) {
wlst[ns] = mystrdup(s);
if (wlst[ns] == NULL) return -1;
ns++;
}
}
return ns;
}
int in_map = 0;
unsigned short c = *((unsigned short *) word + i);
for (int j = 0; j < nummap; j++) {
if (flag_bsearch((unsigned short *) maptable[j].set_utf16, c, maptable[j].len)) {
in_map = 1;
for (int k = 0; k < maptable[j].len; k++) {
*(word + i) = *(maptable[j].set_utf16 + k);
ns = map_related_utf(word, len, i + 1, cpdsuggest,
wlst, ns, maptable, nummap, timer, timelimit);
if (!(*timer)) return ns;
}
*((unsigned short *) word + i) = c;
}
}
if (!in_map) {
i++;
ns = map_related_utf(word, len, i, cpdsuggest,
wlst, ns, maptable, nummap, timer, timelimit);
}
return ns;
}
// suggestions for a typical fault of spelling, that
// differs with more, than 1 letter from the right form.
int SuggestMgr::replchars(char** wlst, const char * word, int ns, int cpdsuggest)
@ -971,7 +917,7 @@ int SuggestMgr::longswapchar(char ** wlst, const char * word, int ns, int cpdsug
strcpy(candidate, word);
for (p = candidate; *p != 0; p++) {
for (q = candidate; *q != 0; q++) {
if (abs(p-q) > 1) {
if (abs((int)(p-q)) > 1) {
tmpc = *p;
*p = *q;
*q = tmpc;
@ -998,7 +944,7 @@ int SuggestMgr::longswapchar_utf(char ** wlst, const w_char * word, int wl, int
memcpy (candidate_utf, word, wl * sizeof(w_char));
for (p = candidate_utf; p < (candidate_utf + wl); p++) {
for (q = candidate_utf; q < (candidate_utf + wl); q++) {
if (abs(p-q) > 1) {
if (abs((int)(p-q)) > 1) {
tmpc = *p;
*p = *q;
*q = tmpc;
@ -1795,11 +1741,10 @@ int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt)
if (ns < 2) break;
}
} else {
char t[MAXSWUTF8L];
l1 = strlen(s1);
l2 = strlen(s2);
if (l2 == 0) return 0;
strcpy(t, s2);
l1 = strlen(s1);
char *t = mystrdup(s2);
if (opt & NGRAM_LOWERING) mkallsmall(t, csconv);
for (int j = 1; j <= n; j++) {
ns = 0;
@ -1812,6 +1757,7 @@ int SuggestMgr::ngram(int n, char * s1, const char * s2, int opt)
nscore = nscore + ns;
if (ns < 2) break;
}
free(t);
}
ns = 0;
@ -1836,12 +1782,13 @@ int SuggestMgr::leftcommonsubstring(char * s1, const char * s2) {
u8_u16(su1, 1, s1);
u8_u16(su2, 1, s2);
unsigned short idx = (su2->h << 8) + su2->l;
if (*((short *)su1) != *((short *)su2) &&
(*((unsigned short *)su1) != unicodetolower(idx, langnum))) return 0;
unsigned short otheridx = (su1->h << 8) + su1->l;
if (otheridx != idx &&
(otheridx != unicodetolower(idx, langnum))) return 0;
int l1 = u8_u16(su1, MAXSWL, s1);
int l2 = u8_u16(su2, MAXSWL, s2);
for(i = 1; (i < l1) && (i < l2) &&
(*((short *)(su1 + i)) == *((short *)(su2 + i))); i++);
(su1[i].l == su2[i].l) && (su1[i].h == su2[i].h); i++);
return i;
}
} else {
@ -1856,7 +1803,7 @@ int SuggestMgr::leftcommonsubstring(char * s1, const char * s2) {
do {
s1++; s2++;
} while ((*s1 == *s2) && (*s1 != '\0'));
return s1 - olds;
return (int)(s1 - olds);
}
}
return 0;
@ -1977,8 +1924,8 @@ void SuggestMgr::lcs(const char * s, const char * s2, int * l1, int * l2, char *
for (j = 0; j <= n; j++) c[j] = 0;
for (i = 1; i <= m; i++) {
for (j = 1; j <= n; j++) {
if ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1))
|| (!utf8) && ((*(s+i-1)) == (*(s2+j-1)))) {
if ( ((utf8) && (*((short *) su+i-1) == *((short *)su2+j-1)))
|| ((!utf8) && ((*(s+i-1)) == (*(s2+j-1))))) {
c[i*(n+1) + j] = c[(i-1)*(n+1) + j-1]+1;
b[i*(n+1) + j] = LCS_UPLEFT;
} else if (c[(i-1)*(n+1) + j] >= c[i*(n+1) + j-1]) {

View File

@ -18,6 +18,7 @@
* Contributor(s): Kevin Hendricks (kevin.hendricks@sympatico.ca)
* David Einstein (deinst@world.std.com)
* László Németh (nemethl@gyorsposta.hu)
* Caolan McNamara (caolanm@redhat.com)
* Davide Prina
* Giuseppe Modugno
* Gianluca Turconi
@ -74,15 +75,17 @@
#define NGRAM_ANY_MISMATCH (1 << 1)
#define NGRAM_LOWERING (1 << 2)
#include "affixmgr.hxx"
#include "hunvisapi.h"
#include "atypes.hxx"
#include "affixmgr.hxx"
#include "hashmgr.hxx"
#include "langnum.hxx"
#include <time.h>
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
class SuggestMgr
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
{
char * ckey;
int ckeyl;
@ -146,8 +149,7 @@ private:
int movechar_utf(char **, const w_char *, int, int, int);
int mapchars(char**, const char *, int, int);
int map_related(const char *, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
int map_related_utf(w_char *, int, int, int, char ** wlst, int, const mapentry*, int, int *, clock_t *);
int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
int ngram(int n, char * s1, const char * s2, int opt);
int mystrlen(const char * word);
int leftcommonsubstring(char * s1, const char * s2);
@ -160,3 +162,4 @@ private:
};
#endif