mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-28 23:31:56 +00:00
Bug 1257902 - Update Hunspell to version 1.4.0. r=ehsan
--HG-- extra : rebase_source : 44913bce7bd6e869cee9b2357af46142997feee4
This commit is contained in:
parent
cd0ac88627
commit
0e12039208
@ -1,182 +1,21 @@
|
||||
About Hunspell
|
||||
--------------
|
||||
Hunspell spell checker and morphological analyser library
|
||||
|
||||
Hunspell is a spell checker and morphological analyzer library and program
|
||||
designed for languages with rich morphology and complex word compounding or
|
||||
character encoding. Hunspell interfaces: Ispell-like terminal interface
|
||||
using Curses library, Ispell pipe interface, OpenOffice.org UNO module.
|
||||
Documentation, tests, examples: http://hunspell.github.io/
|
||||
|
||||
Hunspell's code base comes from the OpenOffice.org MySpell
|
||||
(http://lingucomponent.openoffice.org/MySpell-3.zip). See README.MYSPELL,
|
||||
AUTHORS.MYSPELL and license.myspell files.
|
||||
Hunspell is designed to eventually replace Myspell in OpenOffice.org.
|
||||
Author of Hunspell:
|
||||
László Németh (nemethl (at) gyorsposta.hu)
|
||||
|
||||
Main features of Hunspell spell checker and morphological analyzer:
|
||||
Hunspell based on OpenOffice.org's Myspell. MySpell's author:
|
||||
Kevin Hendricks (kevin.hendricks (at) sympatico.ca)
|
||||
|
||||
- Unicode support (affix rules work only with the first 65535 Unicode characters)
|
||||
License: GPL 2.0/LGPL 2.1/MPL 1.1 tri-license
|
||||
|
||||
- Morphological analysis (in custom item and arrangement style) and stemming
|
||||
The contents of this library may be used under the terms of
|
||||
the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
the GNU Lesser General Public License Version 2.1 or later (the "LGPL",
|
||||
see http://gnu.org/copyleft/lesser.html) or the Mozilla Public License
|
||||
Version 1.1 or later (the "MPL", see http://mozilla.org/MPL/MPL-1.1.html).
|
||||
|
||||
- Max. 65535 affix classes and twofold affix stripping (for agglutinative
|
||||
languages, like Azeri, Basque, Estonian, Finnish, Hungarian, Turkish, etc.)
|
||||
|
||||
- Support complex compoundings (for example, Hungarian and German)
|
||||
|
||||
- Support language specific features (for example, special casing of
|
||||
Azeri and Turkish dotted i, or German sharp s)
|
||||
|
||||
- Handle conditional affixes, circumfixes, fogemorphemes,
|
||||
forbidden words, pseudoroots and homonyms.
|
||||
|
||||
- Free software (LGPL, GPL, MPL tri-license)
|
||||
|
||||
Compiling on Unix/Linux
|
||||
-----------------------
|
||||
|
||||
./configure
|
||||
make
|
||||
make install
|
||||
|
||||
For dictionary development, use the --with-warnings option of configure.
|
||||
|
||||
For interactive user interface of Hunspell executable, use the --with-ui option.
|
||||
|
||||
The developer packages you need to compile Hunspell's interface:
|
||||
|
||||
glibc-devel
|
||||
|
||||
optional developer packages:
|
||||
|
||||
ncurses (need for --with-ui), eg. libncursesw5 for UTF-8
|
||||
readline (for fancy input line editing,
|
||||
configure parameter: --with-readline)
|
||||
locale and gettext (but you can also use the
|
||||
--with-included-gettext configure parameter)
|
||||
|
||||
Hunspell distribution uses new Autoconf (2.59) and Automake (1.9).
|
||||
|
||||
Compiling on Windows
|
||||
--------------------
|
||||
|
||||
1. Compiling with Windows SDK
|
||||
|
||||
Download the free Windows SDK of Microsoft, open a command prompt
|
||||
window and cd into hunspell/src/win_api. Use the following command
|
||||
to compile hunspell:
|
||||
|
||||
vcbuild
|
||||
|
||||
2. Compiling in Cygwin environment
|
||||
|
||||
Download and install Cygwin environment for Windows with the following
|
||||
extra packages:
|
||||
|
||||
make
|
||||
gcc-g++ development package
|
||||
mingw development package (for cygwin.dll free native Windows compilation)
|
||||
ncurses, readline (for user interface)
|
||||
iconv (character conversion)
|
||||
|
||||
2.1. Cygwin1.dll dependent compiling
|
||||
|
||||
Open a Cygwin shell, cd into the hunspell root directory:
|
||||
|
||||
./configure
|
||||
make
|
||||
make install
|
||||
|
||||
For dictionary development, use the --with-warnings option of configure.
|
||||
|
||||
For interactive user interface of Hunspell executable, use the --with-ui option.
|
||||
|
||||
readline configure parameter: --with-readline (for fancy input line editing)
|
||||
|
||||
1.2. Cygwin1.dll free compiling
|
||||
|
||||
Open a Cygwin shell, cd into the hunspell/src/win_api and
|
||||
|
||||
make -f Makefile.cygwin
|
||||
|
||||
Testing
|
||||
-------
|
||||
|
||||
Testing Hunspell (see tests in tests/ subdirectory):
|
||||
|
||||
make check
|
||||
|
||||
or with Valgrind debugger:
|
||||
|
||||
make check
|
||||
VALGRIND=[Valgrind_tool] make check
|
||||
|
||||
For example:
|
||||
|
||||
make check
|
||||
VALGRIND=memcheck make check
|
||||
|
||||
Documentation
|
||||
-------------
|
||||
|
||||
features and dictionary format:
|
||||
man 5 hunspell
|
||||
|
||||
man hunspell
|
||||
hunspell -h
|
||||
http://hunspell.sourceforge.net
|
||||
|
||||
Usage
|
||||
-----
|
||||
|
||||
The src/tools dictionary contains ten executables after compiling
|
||||
(or some of them are in the src/win_api):
|
||||
|
||||
affixcompress: dictionary generation from large (millions of words) vocabularies
|
||||
analyze: example of spell checking, stemming and morphological analysis
|
||||
chmorph: example of automatic morphological generation and conversion
|
||||
example: example of spell checking and suggestion
|
||||
hunspell: main program for spell checking and others (see manual)
|
||||
hunzip: decompressor of hzip format
|
||||
hzip: compressor of hzip format
|
||||
makealias: alias compression (Hunspell only, not back compatible with MySpell)
|
||||
munch: dictionary generation from vocabularies (it needs an affix file, too).
|
||||
unmunch: list all recognized words of a MySpell dictionary
|
||||
wordforms: word generation (Hunspell version of unmunch)
|
||||
|
||||
After compiling and installing (see INSTALL) you can
|
||||
run the Hunspell spell checker (compiled with user interface)
|
||||
with a Hunspell or Myspell dictionary:
|
||||
|
||||
hunspell -d en_US text.txt
|
||||
|
||||
or without interface:
|
||||
|
||||
hunspell
|
||||
hunspell -d en_UK -l <text.txt
|
||||
|
||||
Dictionaries consist of an affix and dictionary file, see tests/
|
||||
or http://wiki.services.openoffice.org/wiki/Dictionaries.
|
||||
|
||||
Using Hunspell library with GCC
|
||||
-------------------------------
|
||||
|
||||
Including in your program:
|
||||
#include <hunspell.hxx>
|
||||
|
||||
Linking with Hunspell static library:
|
||||
g++ -lhunspell example.cxx
|
||||
|
||||
Dictionaries
|
||||
------------
|
||||
|
||||
Myspell & Hunspell dictionaries:
|
||||
http://extensions.libreoffice.org
|
||||
http://cgit.freedesktop.org/libreoffice/dictionaries
|
||||
http://extensions.openoffice.org
|
||||
http://wiki.services.openoffice.org/wiki/Dictionaries
|
||||
|
||||
Aspell dictionaries (need some conversion):
|
||||
ftp://ftp.gnu.org/gnu/aspell/dict
|
||||
Conversion steps: see relevant feature request at http://hunspell.sf.net.
|
||||
|
||||
László Németh
|
||||
nemeth at numbertext org
|
||||
Software distributed under these licenses is distributed on an "AS IS" basis,
|
||||
WITHOUT WARRANTY OF ANY KIND, either express or implied. See the licences
|
||||
for the specific language governing rights and limitations under the licenses.
|
||||
|
@ -1,2 +1,2 @@
|
||||
Hunspell Version: 1.3.3
|
||||
Hunspell Version: 1.4.0
|
||||
Additional Patches: See patches directory.
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,76 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _AFFIX_HXX_
|
||||
#define _AFFIX_HXX_
|
||||
|
||||
@ -9,136 +82,151 @@
|
||||
|
||||
/* A Prefix Entry */
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry
|
||||
{
|
||||
private:
|
||||
PfxEntry(const PfxEntry&);
|
||||
PfxEntry& operator = (const PfxEntry&);
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
class LIBHUNSPELL_DLL_EXPORTED PfxEntry : protected AffEntry {
|
||||
private:
|
||||
PfxEntry(const PfxEntry&);
|
||||
PfxEntry& operator=(const PfxEntry&);
|
||||
|
||||
PfxEntry * next;
|
||||
PfxEntry * nexteq;
|
||||
PfxEntry * nextne;
|
||||
PfxEntry * flgnxt;
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
|
||||
public:
|
||||
PfxEntry* next;
|
||||
PfxEntry* nexteq;
|
||||
PfxEntry* nextne;
|
||||
PfxEntry* flgnxt;
|
||||
|
||||
PfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||
public:
|
||||
PfxEntry(AffixMgr* pmgr, affentry* dp);
|
||||
~PfxEntry();
|
||||
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry * checkword(const char * word, int len, char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry* checkword(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
struct hentry* check_twosfx(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * check_morph(const char * word, int len, char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * check_twosfx_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
char* check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
inline const char * getKey() { return appnd; }
|
||||
char * add(const char * word, int len);
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
inline const char* getKey() { return appnd.c_str(); }
|
||||
char* add(const char* word, size_t len);
|
||||
|
||||
inline short getKeyLen() { return appndl; }
|
||||
inline short getKeyLen() { return appnd.size(); }
|
||||
|
||||
inline const char * getMorph() { return morphcode; }
|
||||
inline const char* getMorph() { return morphcode; }
|
||||
|
||||
inline const unsigned short * getCont() { return contclass; }
|
||||
inline short getContLen() { return contclasslen; }
|
||||
inline const unsigned short* getCont() { return contclass; }
|
||||
inline short getContLen() { return contclasslen; }
|
||||
|
||||
inline PfxEntry * getNext() { return next; }
|
||||
inline PfxEntry * getNextNE() { return nextne; }
|
||||
inline PfxEntry * getNextEQ() { return nexteq; }
|
||||
inline PfxEntry * getFlgNxt() { return flgnxt; }
|
||||
inline PfxEntry* getNext() { return next; }
|
||||
inline PfxEntry* getNextNE() { return nextne; }
|
||||
inline PfxEntry* getNextEQ() { return nexteq; }
|
||||
inline PfxEntry* getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline void setNext(PfxEntry * ptr) { next = ptr; }
|
||||
inline void setNextNE(PfxEntry * ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(PfxEntry * ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(PfxEntry * ptr) { flgnxt = ptr; }
|
||||
|
||||
inline char * nextchar(char * p);
|
||||
inline int test_condition(const char * st);
|
||||
inline void setNext(PfxEntry* ptr) { next = ptr; }
|
||||
inline void setNextNE(PfxEntry* ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(PfxEntry* ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(PfxEntry* ptr) { flgnxt = ptr; }
|
||||
|
||||
inline char* nextchar(char* p);
|
||||
inline int test_condition(const char* st);
|
||||
};
|
||||
|
||||
|
||||
|
||||
|
||||
/* A Suffix Entry */
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry
|
||||
{
|
||||
private:
|
||||
SfxEntry(const SfxEntry&);
|
||||
SfxEntry& operator = (const SfxEntry&);
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
char * rappnd;
|
||||
class LIBHUNSPELL_DLL_EXPORTED SfxEntry : protected AffEntry {
|
||||
private:
|
||||
SfxEntry(const SfxEntry&);
|
||||
SfxEntry& operator=(const SfxEntry&);
|
||||
|
||||
SfxEntry * next;
|
||||
SfxEntry * nexteq;
|
||||
SfxEntry * nextne;
|
||||
SfxEntry * flgnxt;
|
||||
|
||||
SfxEntry * l_morph;
|
||||
SfxEntry * r_morph;
|
||||
SfxEntry * eq_morph;
|
||||
private:
|
||||
AffixMgr* pmyMgr;
|
||||
std::string rappnd;
|
||||
|
||||
public:
|
||||
SfxEntry* next;
|
||||
SfxEntry* nexteq;
|
||||
SfxEntry* nextne;
|
||||
SfxEntry* flgnxt;
|
||||
|
||||
SfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||
SfxEntry* l_morph;
|
||||
SfxEntry* r_morph;
|
||||
SfxEntry* eq_morph;
|
||||
|
||||
public:
|
||||
SfxEntry(AffixMgr* pmgr, affentry* dp);
|
||||
~SfxEntry();
|
||||
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry * checkword(const char * word, int len, int optflags,
|
||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
|
||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry* checkword(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
char** wlst,
|
||||
int maxSug,
|
||||
int* ns,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
const FLAG badflag = FLAG_NULL);
|
||||
|
||||
struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
struct hentry* check_twosfx(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * check_twosfx_morph(const char * word, int len, int optflags,
|
||||
PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
struct hentry * get_next_homonym(struct hentry * he);
|
||||
struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
|
||||
const FLAG cclass, const FLAG needflag);
|
||||
char* check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
struct hentry* get_next_homonym(struct hentry* he);
|
||||
struct hentry* get_next_homonym(struct hentry* word,
|
||||
int optflags,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass,
|
||||
const FLAG needflag);
|
||||
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
inline const char* getKey() { return rappnd.c_str(); }
|
||||
char* add(const char* word, size_t len);
|
||||
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
inline const char * getKey() { return rappnd; }
|
||||
char * add(const char * word, int len);
|
||||
inline const char* getMorph() { return morphcode; }
|
||||
|
||||
inline const unsigned short* getCont() { return contclass; }
|
||||
inline short getContLen() { return contclasslen; }
|
||||
inline const char* getAffix() { return appnd.c_str(); }
|
||||
|
||||
inline const char * getMorph() { return morphcode; }
|
||||
inline short getKeyLen() { return appnd.size(); }
|
||||
|
||||
inline const unsigned short * getCont() { return contclass; }
|
||||
inline short getContLen() { return contclasslen; }
|
||||
inline const char * getAffix() { return appnd; }
|
||||
inline SfxEntry* getNext() { return next; }
|
||||
inline SfxEntry* getNextNE() { return nextne; }
|
||||
inline SfxEntry* getNextEQ() { return nexteq; }
|
||||
|
||||
inline short getKeyLen() { return appndl; }
|
||||
inline SfxEntry* getLM() { return l_morph; }
|
||||
inline SfxEntry* getRM() { return r_morph; }
|
||||
inline SfxEntry* getEQM() { return eq_morph; }
|
||||
inline SfxEntry* getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline SfxEntry * getNext() { return next; }
|
||||
inline SfxEntry * getNextNE() { return nextne; }
|
||||
inline SfxEntry * getNextEQ() { return nexteq; }
|
||||
|
||||
inline SfxEntry * getLM() { return l_morph; }
|
||||
inline SfxEntry * getRM() { return r_morph; }
|
||||
inline SfxEntry * getEQM() { return eq_morph; }
|
||||
inline SfxEntry * getFlgNxt() { return flgnxt; }
|
||||
|
||||
inline void setNext(SfxEntry * ptr) { next = ptr; }
|
||||
inline void setNextNE(SfxEntry * ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(SfxEntry * ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(SfxEntry * ptr) { flgnxt = ptr; }
|
||||
|
||||
inline char * nextchar(char * p);
|
||||
inline int test_condition(const char * st, const char * begin);
|
||||
inline void setNext(SfxEntry* ptr) { next = ptr; }
|
||||
inline void setNextNE(SfxEntry* ptr) { nextne = ptr; }
|
||||
inline void setNextEQ(SfxEntry* ptr) { nexteq = ptr; }
|
||||
inline void setFlgNxt(SfxEntry* ptr) { flgnxt = ptr; }
|
||||
|
||||
inline char* nextchar(char* p);
|
||||
inline int test_condition(const char* st, const char* begin);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,76 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _AFFIXMGR_HXX_
|
||||
#define _AFFIXMGR_HXX_
|
||||
|
||||
@ -5,6 +78,8 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#include <string>
|
||||
|
||||
#include "atypes.hxx"
|
||||
#include "baseaffix.hxx"
|
||||
#include "hashmgr.hxx"
|
||||
@ -12,241 +87,304 @@
|
||||
#include "replist.hxx"
|
||||
|
||||
// check flag duplication
|
||||
#define dupSFX (1 << 0)
|
||||
#define dupPFX (1 << 1)
|
||||
#define dupSFX (1 << 0)
|
||||
#define dupPFX (1 << 1)
|
||||
|
||||
class PfxEntry;
|
||||
class SfxEntry;
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffixMgr
|
||||
{
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffixMgr {
|
||||
PfxEntry* pStart[SETSIZE];
|
||||
SfxEntry* sStart[SETSIZE];
|
||||
PfxEntry* pFlag[SETSIZE];
|
||||
SfxEntry* sFlag[SETSIZE];
|
||||
HashMgr* pHMgr;
|
||||
HashMgr** alldic;
|
||||
int* maxdic;
|
||||
char* keystring;
|
||||
char* trystring;
|
||||
char* encoding;
|
||||
struct cs_info* csconv;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
FLAG compoundflag;
|
||||
FLAG compoundbegin;
|
||||
FLAG compoundmiddle;
|
||||
FLAG compoundend;
|
||||
FLAG compoundroot;
|
||||
FLAG compoundforbidflag;
|
||||
FLAG compoundpermitflag;
|
||||
int compoundmoresuffixes;
|
||||
int checkcompounddup;
|
||||
int checkcompoundrep;
|
||||
int checkcompoundcase;
|
||||
int checkcompoundtriple;
|
||||
int simplifiedtriple;
|
||||
FLAG forbiddenword;
|
||||
FLAG nosuggest;
|
||||
FLAG nongramsuggest;
|
||||
FLAG needaffix;
|
||||
int cpdmin;
|
||||
int numrep;
|
||||
replentry* reptable;
|
||||
RepList* iconvtable;
|
||||
RepList* oconvtable;
|
||||
int nummap;
|
||||
mapentry* maptable;
|
||||
int numbreak;
|
||||
char** breaktable;
|
||||
int numcheckcpd;
|
||||
patentry* checkcpdtable;
|
||||
int simplifiedcpd;
|
||||
int numdefcpd;
|
||||
flagentry* defcpdtable;
|
||||
phonetable* phone;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int maxdiff;
|
||||
int onlymaxdiff;
|
||||
int nosplitsugs;
|
||||
int sugswithdots;
|
||||
int cpdwordmax;
|
||||
int cpdmaxsyllable;
|
||||
char* cpdvowels;
|
||||
w_char* cpdvowels_utf16;
|
||||
int cpdvowels_utf16_len;
|
||||
char* cpdsyllablenum;
|
||||
const char* pfxappnd; // BUG: not stateless
|
||||
const char* sfxappnd; // BUG: not stateless
|
||||
int sfxextra; // BUG: not stateless
|
||||
FLAG sfxflag; // BUG: not stateless
|
||||
char* derived; // BUG: not stateless
|
||||
SfxEntry* sfx; // BUG: not stateless
|
||||
PfxEntry* pfx; // BUG: not stateless
|
||||
int checknum;
|
||||
char* wordchars;
|
||||
std::vector<w_char> wordchars_utf16;
|
||||
char* ignorechars;
|
||||
std::vector<w_char> ignorechars_utf16;
|
||||
char* version;
|
||||
char* lang;
|
||||
int langnum;
|
||||
FLAG lemma_present;
|
||||
FLAG circumfix;
|
||||
FLAG onlyincompound;
|
||||
FLAG keepcase;
|
||||
FLAG forceucase;
|
||||
FLAG warn;
|
||||
int forbidwarn;
|
||||
FLAG substandard;
|
||||
int checksharps;
|
||||
int fullstrip;
|
||||
|
||||
PfxEntry * pStart[SETSIZE];
|
||||
SfxEntry * sStart[SETSIZE];
|
||||
PfxEntry * pFlag[SETSIZE];
|
||||
SfxEntry * sFlag[SETSIZE];
|
||||
HashMgr * pHMgr;
|
||||
HashMgr ** alldic;
|
||||
int * maxdic;
|
||||
char * keystring;
|
||||
char * trystring;
|
||||
char * encoding;
|
||||
struct cs_info * csconv;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
FLAG compoundflag;
|
||||
FLAG compoundbegin;
|
||||
FLAG compoundmiddle;
|
||||
FLAG compoundend;
|
||||
FLAG compoundroot;
|
||||
FLAG compoundforbidflag;
|
||||
FLAG compoundpermitflag;
|
||||
int compoundmoresuffixes;
|
||||
int checkcompounddup;
|
||||
int checkcompoundrep;
|
||||
int checkcompoundcase;
|
||||
int checkcompoundtriple;
|
||||
int simplifiedtriple;
|
||||
FLAG forbiddenword;
|
||||
FLAG nosuggest;
|
||||
FLAG nongramsuggest;
|
||||
FLAG needaffix;
|
||||
int cpdmin;
|
||||
int numrep;
|
||||
replentry * reptable;
|
||||
RepList * iconvtable;
|
||||
RepList * oconvtable;
|
||||
int nummap;
|
||||
mapentry * maptable;
|
||||
int numbreak;
|
||||
char ** breaktable;
|
||||
int numcheckcpd;
|
||||
patentry * checkcpdtable;
|
||||
int simplifiedcpd;
|
||||
int numdefcpd;
|
||||
flagentry * defcpdtable;
|
||||
phonetable * phone;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int maxdiff;
|
||||
int onlymaxdiff;
|
||||
int nosplitsugs;
|
||||
int sugswithdots;
|
||||
int cpdwordmax;
|
||||
int cpdmaxsyllable;
|
||||
char * cpdvowels;
|
||||
w_char * cpdvowels_utf16;
|
||||
int cpdvowels_utf16_len;
|
||||
char * cpdsyllablenum;
|
||||
const char * pfxappnd; // BUG: not stateless
|
||||
const char * sfxappnd; // BUG: not stateless
|
||||
FLAG sfxflag; // BUG: not stateless
|
||||
char * derived; // BUG: not stateless
|
||||
SfxEntry * sfx; // BUG: not stateless
|
||||
PfxEntry * pfx; // BUG: not stateless
|
||||
int checknum;
|
||||
char * wordchars;
|
||||
unsigned short * wordchars_utf16;
|
||||
int wordchars_utf16_len;
|
||||
char * ignorechars;
|
||||
unsigned short * ignorechars_utf16;
|
||||
int ignorechars_utf16_len;
|
||||
char * version;
|
||||
char * lang;
|
||||
int langnum;
|
||||
FLAG lemma_present;
|
||||
FLAG circumfix;
|
||||
FLAG onlyincompound;
|
||||
FLAG keepcase;
|
||||
FLAG forceucase;
|
||||
FLAG warn;
|
||||
int forbidwarn;
|
||||
FLAG substandard;
|
||||
int checksharps;
|
||||
int fullstrip;
|
||||
int havecontclass; // boolean variable
|
||||
char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold
|
||||
// affix)
|
||||
|
||||
int havecontclass; // boolean variable
|
||||
char contclasses[CONTSIZE]; // flags of possible continuing classes (twofold affix)
|
||||
|
||||
public:
|
||||
|
||||
AffixMgr(const char * affpath, HashMgr** ptr, int * md,
|
||||
const char * key = NULL);
|
||||
public:
|
||||
AffixMgr(const char* affpath, HashMgr** ptr, int* md, const char* key = NULL);
|
||||
~AffixMgr();
|
||||
struct hentry * affix_check(const char * word, int len,
|
||||
const unsigned short needflag = (unsigned short) 0,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry * prefix_check(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
inline int isSubset(const char * s1, const char * s2);
|
||||
struct hentry * prefix_check_twosfx(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
inline int isRevSubset(const char * s1, const char * end_of_s2, int len);
|
||||
struct hentry * suffix_check(const char * word, int len, int sfxopts,
|
||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry * suffix_check_twosfx(const char * word, int len,
|
||||
int sfxopts, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
struct hentry* affix_check(const char* word,
|
||||
int len,
|
||||
const unsigned short needflag = (unsigned short)0,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry* prefix_check(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
inline int isSubset(const char* s1, const char* s2);
|
||||
struct hentry* prefix_check_twosfx(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
inline int isRevSubset(const char* s1, const char* end_of_s2, int len);
|
||||
struct hentry* suffix_check(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
char** wlst,
|
||||
int maxSug,
|
||||
int* ns,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
struct hentry* suffix_check_twosfx(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * affix_check_morph(const char * word, int len,
|
||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
||||
char * prefix_check_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
char * suffix_check_morph (const char * word, int len, int sfxopts,
|
||||
PfxEntry * ppfx, const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL, char in_compound = IN_CPD_NOT);
|
||||
char* affix_check_morph(const char* word,
|
||||
int len,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
char* prefix_check_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* suffix_check_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG cclass = FLAG_NULL,
|
||||
const FLAG needflag = FLAG_NULL,
|
||||
char in_compound = IN_CPD_NOT);
|
||||
|
||||
char * prefix_check_twosfx_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
char * suffix_check_twosfx_morph(const char * word, int len,
|
||||
int sfxopts, PfxEntry * ppfx, const FLAG needflag = FLAG_NULL);
|
||||
char* prefix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
char* suffix_check_twosfx_morph(const char* word,
|
||||
int len,
|
||||
int sfxopts,
|
||||
PfxEntry* ppfx,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * morphgen(char * ts, int wl, const unsigned short * ap,
|
||||
unsigned short al, char * morph, char * targetmorph, int level);
|
||||
char* morphgen(const char* ts,
|
||||
int wl,
|
||||
const unsigned short* ap,
|
||||
unsigned short al,
|
||||
const char* morph,
|
||||
const char* targetmorph,
|
||||
int level);
|
||||
|
||||
int expand_rootword(struct guessword * wlst, int maxn, const char * ts,
|
||||
int wl, const unsigned short * ap, unsigned short al, char * bad,
|
||||
int, char *);
|
||||
int expand_rootword(struct guessword* wlst,
|
||||
int maxn,
|
||||
const char* ts,
|
||||
int wl,
|
||||
const unsigned short* ap,
|
||||
unsigned short al,
|
||||
const char* bad,
|
||||
int,
|
||||
const char*);
|
||||
|
||||
short get_syllable (const char * word, int wlen);
|
||||
int cpdrep_check(const char * word, int len);
|
||||
int cpdpat_check(const char * word, int len, hentry * r1, hentry * r2,
|
||||
const char affixed);
|
||||
int defcpd_check(hentry *** words, short wnum, hentry * rv,
|
||||
hentry ** rwords, char all);
|
||||
int cpdcase_check(const char * word, int len);
|
||||
inline int candidate_check(const char * word, int len);
|
||||
void setcminmax(int * cmin, int * cmax, const char * word, int len);
|
||||
struct hentry * compound_check(const char * word, int len, short wordnum,
|
||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
||||
char hu_mov_rule, char is_sug, int * info);
|
||||
short get_syllable(const std::string& word);
|
||||
int cpdrep_check(const char* word, int len);
|
||||
int cpdpat_check(const char* word,
|
||||
int len,
|
||||
hentry* r1,
|
||||
hentry* r2,
|
||||
const char affixed);
|
||||
int defcpd_check(hentry*** words,
|
||||
short wnum,
|
||||
hentry* rv,
|
||||
hentry** rwords,
|
||||
char all);
|
||||
int cpdcase_check(const char* word, int len);
|
||||
inline int candidate_check(const char* word, int len);
|
||||
void setcminmax(int* cmin, int* cmax, const char* word, int len);
|
||||
struct hentry* compound_check(const char* word,
|
||||
int len,
|
||||
short wordnum,
|
||||
short numsyllable,
|
||||
short maxwordnum,
|
||||
short wnum,
|
||||
hentry** words,
|
||||
hentry** rwords,
|
||||
char hu_mov_rule,
|
||||
char is_sug,
|
||||
int* info);
|
||||
|
||||
int compound_check_morph(const char * word, int len, short wordnum,
|
||||
short numsyllable, short maxwordnum, short wnum, hentry ** words,
|
||||
char hu_mov_rule, char ** result, char * partresult);
|
||||
int compound_check_morph(const char* word,
|
||||
int len,
|
||||
short wordnum,
|
||||
short numsyllable,
|
||||
short maxwordnum,
|
||||
short wnum,
|
||||
hentry** words,
|
||||
hentry** rwords,
|
||||
char hu_mov_rule,
|
||||
char** result,
|
||||
char* partresult);
|
||||
|
||||
struct hentry * lookup(const char * word);
|
||||
int get_numrep() const;
|
||||
struct replentry * get_reptable() const;
|
||||
RepList * get_iconvtable() const;
|
||||
RepList * get_oconvtable() const;
|
||||
struct phonetable * get_phonetable() const;
|
||||
int get_nummap() const;
|
||||
struct mapentry * get_maptable() const;
|
||||
int get_numbreak() const;
|
||||
char ** get_breaktable() const;
|
||||
char * get_encoding();
|
||||
int get_langnum() const;
|
||||
char * get_key_string();
|
||||
char * get_try_string() const;
|
||||
const char * get_wordchars() const;
|
||||
unsigned short * get_wordchars_utf16(int * len) const;
|
||||
char * get_ignore() const;
|
||||
unsigned short * get_ignore_utf16(int * len) const;
|
||||
int get_compound() const;
|
||||
FLAG get_compoundflag() const;
|
||||
FLAG get_compoundbegin() const;
|
||||
FLAG get_forbiddenword() const;
|
||||
FLAG get_nosuggest() const;
|
||||
FLAG get_nongramsuggest() const;
|
||||
FLAG get_needaffix() const;
|
||||
FLAG get_onlyincompound() const;
|
||||
FLAG get_compoundroot() const;
|
||||
FLAG get_lemma_present() const;
|
||||
int get_checknum() const;
|
||||
const char * get_prefix() const;
|
||||
const char * get_suffix() const;
|
||||
const char * get_derived() const;
|
||||
const char * get_version() const;
|
||||
int have_contclass() const;
|
||||
int get_utf8() const;
|
||||
int get_complexprefixes() const;
|
||||
char * get_suffixed(char ) const;
|
||||
int get_maxngramsugs() const;
|
||||
int get_maxcpdsugs() const;
|
||||
int get_maxdiff() const;
|
||||
int get_onlymaxdiff() const;
|
||||
int get_nosplitsugs() const;
|
||||
int get_sugswithdots(void) const;
|
||||
FLAG get_keepcase(void) const;
|
||||
FLAG get_forceucase(void) const;
|
||||
FLAG get_warn(void) const;
|
||||
int get_forbidwarn(void) const;
|
||||
int get_checksharps(void) const;
|
||||
char * encode_flag(unsigned short aflag) const;
|
||||
int get_fullstrip() const;
|
||||
int get_suffix_words(short unsigned* suff,
|
||||
int len,
|
||||
const char* root_word,
|
||||
char** slst);
|
||||
|
||||
private:
|
||||
int parse_file(const char * affpath, const char * key);
|
||||
int parse_flag(char * line, unsigned short * out, FileMgr * af);
|
||||
int parse_num(char * line, int * out, FileMgr * af);
|
||||
int parse_cpdsyllable(char * line, FileMgr * af);
|
||||
int parse_reptable(char * line, FileMgr * af);
|
||||
int parse_convtable(char * line, FileMgr * af, RepList ** rl, const char * keyword);
|
||||
int parse_phonetable(char * line, FileMgr * af);
|
||||
int parse_maptable(char * line, FileMgr * af);
|
||||
int parse_breaktable(char * line, FileMgr * af);
|
||||
int parse_checkcpdtable(char * line, FileMgr * af);
|
||||
int parse_defcpdtable(char * line, FileMgr * af);
|
||||
int parse_affix(char * line, const char at, FileMgr * af, char * dupflags);
|
||||
struct hentry* lookup(const char* word);
|
||||
int get_numrep() const;
|
||||
struct replentry* get_reptable() const;
|
||||
RepList* get_iconvtable() const;
|
||||
RepList* get_oconvtable() const;
|
||||
struct phonetable* get_phonetable() const;
|
||||
int get_nummap() const;
|
||||
struct mapentry* get_maptable() const;
|
||||
int get_numbreak() const;
|
||||
char** get_breaktable() const;
|
||||
char* get_encoding();
|
||||
int get_langnum() const;
|
||||
char* get_key_string();
|
||||
char* get_try_string() const;
|
||||
const char* get_wordchars() const;
|
||||
const std::vector<w_char>& get_wordchars_utf16() const;
|
||||
char* get_ignore() const;
|
||||
const std::vector<w_char>& get_ignore_utf16() const;
|
||||
int get_compound() const;
|
||||
FLAG get_compoundflag() const;
|
||||
FLAG get_compoundbegin() const;
|
||||
FLAG get_forbiddenword() const;
|
||||
FLAG get_nosuggest() const;
|
||||
FLAG get_nongramsuggest() const;
|
||||
FLAG get_needaffix() const;
|
||||
FLAG get_onlyincompound() const;
|
||||
FLAG get_compoundroot() const;
|
||||
FLAG get_lemma_present() const;
|
||||
int get_checknum() const;
|
||||
const char* get_prefix() const;
|
||||
const char* get_suffix() const;
|
||||
const char* get_derived() const;
|
||||
const char* get_version() const;
|
||||
int have_contclass() const;
|
||||
int get_utf8() const;
|
||||
int get_complexprefixes() const;
|
||||
char* get_suffixed(char) const;
|
||||
int get_maxngramsugs() const;
|
||||
int get_maxcpdsugs() const;
|
||||
int get_maxdiff() const;
|
||||
int get_onlymaxdiff() const;
|
||||
int get_nosplitsugs() const;
|
||||
int get_sugswithdots(void) const;
|
||||
FLAG get_keepcase(void) const;
|
||||
FLAG get_forceucase(void) const;
|
||||
FLAG get_warn(void) const;
|
||||
int get_forbidwarn(void) const;
|
||||
int get_checksharps(void) const;
|
||||
char* encode_flag(unsigned short aflag) const;
|
||||
int get_fullstrip() const;
|
||||
|
||||
void reverse_condition(char *);
|
||||
void debugflag(char * result, unsigned short flag);
|
||||
int condlen(char *);
|
||||
int encodeit(affentry &entry, char * cs);
|
||||
private:
|
||||
int parse_file(const char* affpath, const char* key);
|
||||
int parse_flag(char* line, unsigned short* out, FileMgr* af);
|
||||
int parse_num(char* line, int* out, FileMgr* af);
|
||||
int parse_cpdsyllable(char* line, FileMgr* af);
|
||||
int parse_reptable(char* line, FileMgr* af);
|
||||
int parse_convtable(char* line,
|
||||
FileMgr* af,
|
||||
RepList** rl,
|
||||
const char* keyword);
|
||||
int parse_phonetable(char* line, FileMgr* af);
|
||||
int parse_maptable(char* line, FileMgr* af);
|
||||
int parse_breaktable(char* line, FileMgr* af);
|
||||
int parse_checkcpdtable(char* line, FileMgr* af);
|
||||
int parse_defcpdtable(char* line, FileMgr* af);
|
||||
int parse_affix(char* line, const char at, FileMgr* af, char* dupflags);
|
||||
|
||||
void reverse_condition(std::string&);
|
||||
void debugflag(char* result, unsigned short flag);
|
||||
std::string& debugflag(std::string& result, unsigned short flag);
|
||||
int condlen(const char*);
|
||||
int encodeit(affentry& entry, const char* cs);
|
||||
int build_pfxtree(PfxEntry* pfxptr);
|
||||
int build_sfxtree(SfxEntry* sfxptr);
|
||||
int process_pfx_order();
|
||||
int process_sfx_order();
|
||||
PfxEntry * process_pfx_in_order(PfxEntry * ptr, PfxEntry * nptr);
|
||||
SfxEntry * process_sfx_in_order(SfxEntry * ptr, SfxEntry * nptr);
|
||||
PfxEntry* process_pfx_in_order(PfxEntry* ptr, PfxEntry* nptr);
|
||||
SfxEntry* process_sfx_in_order(SfxEntry* ptr, SfxEntry* nptr);
|
||||
int process_pfx_tree_to_list();
|
||||
int process_sfx_tree_to_list();
|
||||
int redundant_condition(char, char * strip, int stripl,
|
||||
const char * cond, int);
|
||||
void finishFileMgr(FileMgr *afflst);
|
||||
int redundant_condition(char, const char* strip, int stripl, const char* cond, int);
|
||||
void finishFileMgr(FileMgr* afflst);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,3 +1,43 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _ATYPES_HXX_
|
||||
#define _ATYPES_HXX_
|
||||
|
||||
@ -6,8 +46,9 @@
|
||||
#ifdef HUNSPELL_WARNING_ON
|
||||
#define HUNSPELL_WARNING fprintf
|
||||
#else
|
||||
// empty inline function to switch off warnings (instead of the C99 standard variadic macros)
|
||||
static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
||||
// empty inline function to switch off warnings (instead of the C99 standard
|
||||
// variadic macros)
|
||||
static inline void HUNSPELL_WARNING(FILE*, const char*, ...) {}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
@ -16,90 +57,87 @@ static inline void HUNSPELL_WARNING(FILE *, const char *, ...) {}
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
#include "w_char.hxx"
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
|
||||
#define SETSIZE 256
|
||||
#define CONTSIZE 65536
|
||||
#define MAXWORDLEN 100
|
||||
#define MAXWORDUTF8LEN 256
|
||||
#define SETSIZE 256
|
||||
#define CONTSIZE 65536
|
||||
|
||||
// affentry options
|
||||
#define aeXPRODUCT (1 << 0)
|
||||
#define aeUTF8 (1 << 1)
|
||||
#define aeALIASF (1 << 2)
|
||||
#define aeALIASM (1 << 3)
|
||||
#define aeLONGCOND (1 << 4)
|
||||
#define aeXPRODUCT (1 << 0)
|
||||
#define aeUTF8 (1 << 1)
|
||||
#define aeALIASF (1 << 2)
|
||||
#define aeALIASM (1 << 3)
|
||||
#define aeLONGCOND (1 << 4)
|
||||
|
||||
// compound options
|
||||
#define IN_CPD_NOT 0
|
||||
#define IN_CPD_NOT 0
|
||||
#define IN_CPD_BEGIN 1
|
||||
#define IN_CPD_END 2
|
||||
#define IN_CPD_END 2
|
||||
#define IN_CPD_OTHER 3
|
||||
|
||||
// info options
|
||||
#define SPELL_COMPOUND (1 << 0)
|
||||
#define SPELL_FORBIDDEN (1 << 1)
|
||||
#define SPELL_ALLCAP (1 << 2)
|
||||
#define SPELL_NOCAP (1 << 3)
|
||||
#define SPELL_INITCAP (1 << 4)
|
||||
#define SPELL_ORIGCAP (1 << 5)
|
||||
#define SPELL_WARN (1 << 6)
|
||||
#define SPELL_COMPOUND (1 << 0)
|
||||
#define SPELL_FORBIDDEN (1 << 1)
|
||||
#define SPELL_ALLCAP (1 << 2)
|
||||
#define SPELL_NOCAP (1 << 3)
|
||||
#define SPELL_INITCAP (1 << 4)
|
||||
#define SPELL_ORIGCAP (1 << 5)
|
||||
#define SPELL_WARN (1 << 6)
|
||||
|
||||
#define MAXLNLEN 8192
|
||||
#define MAXLNLEN 8192
|
||||
|
||||
#define MINCPDLEN 3
|
||||
#define MAXCOMPOUND 10
|
||||
#define MAXCONDLEN 20
|
||||
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char *))
|
||||
#define MINCPDLEN 3
|
||||
#define MAXCOMPOUND 10
|
||||
#define MAXCONDLEN 20
|
||||
#define MAXCONDLEN_1 (MAXCONDLEN - sizeof(char*))
|
||||
|
||||
#define MAXACC 1000
|
||||
#define MAXACC 1000
|
||||
|
||||
#define FLAG unsigned short
|
||||
#define FLAG_NULL 0x00
|
||||
#define FREE_FLAG(a) a = 0
|
||||
|
||||
#define TESTAFF( a, b , c ) (flag_bsearch((unsigned short *) a, (unsigned short) b, c))
|
||||
#define TESTAFF(a, b, c) (std::binary_search(a, a + c, b))
|
||||
|
||||
struct affentry
|
||||
{
|
||||
char * strip;
|
||||
char * appnd;
|
||||
unsigned char stripl;
|
||||
unsigned char appndl;
|
||||
char numconds;
|
||||
char opts;
|
||||
unsigned short aflag;
|
||||
unsigned short * contclass;
|
||||
short contclasslen;
|
||||
union {
|
||||
char conds[MAXCONDLEN];
|
||||
struct {
|
||||
char conds1[MAXCONDLEN_1];
|
||||
char * conds2;
|
||||
} l;
|
||||
} c;
|
||||
char * morphcode;
|
||||
struct affentry {
|
||||
std::string strip;
|
||||
std::string appnd;
|
||||
char numconds;
|
||||
char opts;
|
||||
unsigned short aflag;
|
||||
unsigned short* contclass;
|
||||
short contclasslen;
|
||||
union {
|
||||
char conds[MAXCONDLEN];
|
||||
struct {
|
||||
char conds1[MAXCONDLEN_1];
|
||||
char* conds2;
|
||||
} l;
|
||||
} c;
|
||||
char* morphcode;
|
||||
};
|
||||
|
||||
struct guessword {
|
||||
char * word;
|
||||
char* word;
|
||||
bool allow;
|
||||
char * orig;
|
||||
char* orig;
|
||||
};
|
||||
|
||||
struct mapentry {
|
||||
char ** set;
|
||||
char** set;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct flagentry {
|
||||
FLAG * def;
|
||||
FLAG* def;
|
||||
int len;
|
||||
};
|
||||
|
||||
struct patentry {
|
||||
char * pattern;
|
||||
char * pattern2;
|
||||
char * pattern3;
|
||||
char* pattern;
|
||||
char* pattern2;
|
||||
char* pattern3;
|
||||
FLAG cond;
|
||||
FLAG cond2;
|
||||
};
|
||||
|
@ -1,32 +1,77 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _BASEAFF_HXX_
|
||||
#define _BASEAFF_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
#include <string>
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry
|
||||
{
|
||||
private:
|
||||
AffEntry(const AffEntry&);
|
||||
AffEntry& operator = (const AffEntry&);
|
||||
protected:
|
||||
AffEntry() {}
|
||||
char * appnd;
|
||||
char * strip;
|
||||
unsigned char appndl;
|
||||
unsigned char stripl;
|
||||
char numconds;
|
||||
char opts;
|
||||
unsigned short aflag;
|
||||
union {
|
||||
char conds[MAXCONDLEN];
|
||||
struct {
|
||||
char conds1[MAXCONDLEN_1];
|
||||
char * conds2;
|
||||
} l;
|
||||
} c;
|
||||
char * morphcode;
|
||||
unsigned short * contclass;
|
||||
short contclasslen;
|
||||
class LIBHUNSPELL_DLL_EXPORTED AffEntry {
|
||||
private:
|
||||
AffEntry(const AffEntry&);
|
||||
AffEntry& operator=(const AffEntry&);
|
||||
|
||||
protected:
|
||||
AffEntry()
|
||||
: numconds(0),
|
||||
opts(0),
|
||||
aflag(0),
|
||||
morphcode(0),
|
||||
contclass(NULL),
|
||||
contclasslen(0) {}
|
||||
std::string appnd;
|
||||
std::string strip;
|
||||
unsigned char numconds;
|
||||
char opts;
|
||||
unsigned short aflag;
|
||||
union {
|
||||
char conds[MAXCONDLEN];
|
||||
struct {
|
||||
char conds1[MAXCONDLEN_1];
|
||||
char* conds2;
|
||||
} l;
|
||||
} c;
|
||||
char* morphcode;
|
||||
unsigned short* contclass;
|
||||
short contclasslen;
|
||||
};
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,76 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef __CSUTILHXX__
|
||||
#define __CSUTILHXX__
|
||||
|
||||
@ -5,112 +78,110 @@
|
||||
|
||||
// First some base level utility routines
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <string.h>
|
||||
#include "w_char.hxx"
|
||||
#include "htypes.hxx"
|
||||
|
||||
#ifdef MOZILLA_CLIENT
|
||||
#include "nscore.h" // for mozalloc headers
|
||||
#include "nscore.h" // for mozalloc headers
|
||||
#endif
|
||||
|
||||
// casing
|
||||
#define NOCAP 0
|
||||
#define NOCAP 0
|
||||
#define INITCAP 1
|
||||
#define ALLCAP 2
|
||||
#define HUHCAP 3
|
||||
#define HUHINITCAP 4
|
||||
#define ALLCAP 2
|
||||
#define HUHCAP 3
|
||||
#define HUHINITCAP 4
|
||||
|
||||
// default encoding and keystring
|
||||
#define SPELL_ENCODING "ISO8859-1"
|
||||
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
|
||||
#define SPELL_ENCODING "ISO8859-1"
|
||||
#define SPELL_KEYSTRING "qwertyuiop|asdfghjkl|zxcvbnm"
|
||||
|
||||
// default morphological fields
|
||||
#define MORPH_STEM "st:"
|
||||
#define MORPH_ALLOMORPH "al:"
|
||||
#define MORPH_POS "po:"
|
||||
#define MORPH_DERI_PFX "dp:"
|
||||
#define MORPH_INFL_PFX "ip:"
|
||||
#define MORPH_TERM_PFX "tp:"
|
||||
#define MORPH_DERI_SFX "ds:"
|
||||
#define MORPH_INFL_SFX "is:"
|
||||
#define MORPH_TERM_SFX "ts:"
|
||||
#define MORPH_SURF_PFX "sp:"
|
||||
#define MORPH_FREQ "fr:"
|
||||
#define MORPH_PHON "ph:"
|
||||
#define MORPH_HYPH "hy:"
|
||||
#define MORPH_PART "pa:"
|
||||
#define MORPH_FLAG "fl:"
|
||||
#define MORPH_HENTRY "_H:"
|
||||
#define MORPH_TAG_LEN strlen(MORPH_STEM)
|
||||
#define MORPH_STEM "st:"
|
||||
#define MORPH_ALLOMORPH "al:"
|
||||
#define MORPH_POS "po:"
|
||||
#define MORPH_DERI_PFX "dp:"
|
||||
#define MORPH_INFL_PFX "ip:"
|
||||
#define MORPH_TERM_PFX "tp:"
|
||||
#define MORPH_DERI_SFX "ds:"
|
||||
#define MORPH_INFL_SFX "is:"
|
||||
#define MORPH_TERM_SFX "ts:"
|
||||
#define MORPH_SURF_PFX "sp:"
|
||||
#define MORPH_FREQ "fr:"
|
||||
#define MORPH_PHON "ph:"
|
||||
#define MORPH_HYPH "hy:"
|
||||
#define MORPH_PART "pa:"
|
||||
#define MORPH_FLAG "fl:"
|
||||
#define MORPH_HENTRY "_H:"
|
||||
#define MORPH_TAG_LEN strlen(MORPH_STEM)
|
||||
|
||||
#define MSEP_FLD ' '
|
||||
#define MSEP_REC '\n'
|
||||
#define MSEP_ALT '\v'
|
||||
|
||||
// default flags
|
||||
#define DEFAULTFLAGS 65510
|
||||
#define FORBIDDENWORD 65510
|
||||
#define DEFAULTFLAGS 65510
|
||||
#define FORBIDDENWORD 65510
|
||||
#define ONLYUPCASEFLAG 65511
|
||||
|
||||
// fopen or optional _wfopen to fix long pathname problem of WIN32
|
||||
LIBHUNSPELL_DLL_EXPORTED FILE * myfopen(const char * path, const char * mode);
|
||||
LIBHUNSPELL_DLL_EXPORTED FILE* myfopen(const char* path, const char* mode);
|
||||
|
||||
// convert UTF-16 characters to UTF-8
|
||||
LIBHUNSPELL_DLL_EXPORTED char * u16_u8(char * dest, int size, const w_char * src, int srclen);
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& u16_u8(std::string& dest,
|
||||
const std::vector<w_char>& src);
|
||||
|
||||
// convert UTF-8 characters to UTF-16
|
||||
LIBHUNSPELL_DLL_EXPORTED int u8_u16(w_char * dest, int size, const char * src);
|
||||
|
||||
// sort 2-byte vector
|
||||
LIBHUNSPELL_DLL_EXPORTED void flag_qsort(unsigned short flags[], int begin, int end);
|
||||
|
||||
// binary search in 2-byte vector
|
||||
LIBHUNSPELL_DLL_EXPORTED int flag_bsearch(unsigned short flags[], unsigned short flag, int right);
|
||||
LIBHUNSPELL_DLL_EXPORTED int u8_u16(std::vector<w_char>& dest,
|
||||
const std::string& src);
|
||||
|
||||
// remove end of line char(s)
|
||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED void mychomp(char* s);
|
||||
|
||||
// duplicate string
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrdup(const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrdup(const char* s);
|
||||
|
||||
// strcat for limited length destination string
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrcat(char * dest, const char * st, int max);
|
||||
|
||||
// duplicate reverse of string
|
||||
LIBHUNSPELL_DLL_EXPORTED char * myrevstrdup(const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrcat(char* dest, const char* st, int max);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep(char ** sptr, const char delim);
|
||||
// parse into tokens with char delimiter
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrsep2(char ** sptr, const char delim);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrsep(char** sptr, const char delim);
|
||||
|
||||
// parse into tokens with char delimiter
|
||||
LIBHUNSPELL_DLL_EXPORTED char * mystrrep(char *, const char *, const char *);
|
||||
// replace pat by rep in word and return word
|
||||
LIBHUNSPELL_DLL_EXPORTED char* mystrrep(char* word,
|
||||
const char* pat,
|
||||
const char* rep);
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mystrrep(std::string& str,
|
||||
const std::string& search,
|
||||
const std::string& replace);
|
||||
|
||||
// append s to ends of every lines in text
|
||||
LIBHUNSPELL_DLL_EXPORTED void strlinecat(char * lines, const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& strlinecat(std::string& str,
|
||||
const std::string& apd);
|
||||
|
||||
// tokenize into lines with new line
|
||||
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char * text, char *** lines, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED int line_tok(const char* text,
|
||||
char*** lines,
|
||||
char breakchar);
|
||||
|
||||
// tokenize into lines with new line and uniq in place
|
||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq(char * text, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * line_uniq_app(char ** text, char breakchar);
|
||||
|
||||
// change oldchar to newchar in place
|
||||
LIBHUNSPELL_DLL_EXPORTED char * tr(char * text, char oldc, char newc);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* line_uniq(char* text, char breakchar);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* line_uniq_app(char** text, char breakchar);
|
||||
|
||||
// reverse word
|
||||
LIBHUNSPELL_DLL_EXPORTED int reverseword(char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t reverseword(std::string& word);
|
||||
|
||||
// reverse word
|
||||
LIBHUNSPELL_DLL_EXPORTED int reverseword_utf(char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t reverseword_utf(std::string&);
|
||||
|
||||
// remove duplicates
|
||||
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char ** list, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED int uniqlist(char** list, int n);
|
||||
|
||||
// free character array list
|
||||
LIBHUNSPELL_DLL_EXPORTED void freelist(char *** list, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED void freelist(char*** list, int n);
|
||||
|
||||
// character encoding information
|
||||
struct cs_info {
|
||||
@ -121,103 +192,134 @@ struct cs_info {
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int initialize_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED void free_utf_tbl();
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetoupper(unsigned short c,
|
||||
int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED w_char upper_utf(w_char u, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED w_char lower_utf(w_char u, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED unsigned short unicodetolower(unsigned short c,
|
||||
int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED int unicodeisalpha(unsigned short c);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info * get_current_cs(const char * es);
|
||||
LIBHUNSPELL_DLL_EXPORTED struct cs_info* get_current_cs(const char* es);
|
||||
|
||||
// get language identifiers of language codes
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char * lang);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_lang_num(const char* lang);
|
||||
|
||||
// get characters of the given 8bit encoding with lower- and uppercase forms
|
||||
LIBHUNSPELL_DLL_EXPORTED char * get_casechars(const char * enc);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* get_casechars(const char* enc);
|
||||
|
||||
// convert null terminated string to all caps using encoding
|
||||
LIBHUNSPELL_DLL_EXPORTED void enmkallcap(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to all little using encoding
|
||||
LIBHUNSPELL_DLL_EXPORTED void enmkallsmall(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to have initial capital using encoding
|
||||
LIBHUNSPELL_DLL_EXPORTED void enmkinitcap(char * d, const char * p, const char * encoding);
|
||||
|
||||
// convert null terminated string to all caps
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap(char * p, const struct cs_info * csconv);
|
||||
// convert std::string to all caps
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkallcap(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert null terminated string to all little
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall(char * p, const struct cs_info * csconv);
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkallsmall(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert null terminated string to have initial capital
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkinitcap(char * p, const struct cs_info * csconv);
|
||||
// convert first letter of string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkinitsmall(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert first nc characters of UTF-8 string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallsmall_utf(w_char * u, int nc, int langnum);
|
||||
// convert first letter of string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string& mkinitcap(std::string& s,
|
||||
const struct cs_info* csconv);
|
||||
|
||||
// convert first nc characters of UTF-8 string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED void mkallcap_utf(w_char * u, int nc, int langnum);
|
||||
// convert first letter of UTF-8 string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkinitcap_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// convert UTF-8 string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkallsmall_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// convert first letter of UTF-8 string to little
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkinitsmall_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// convert UTF-8 string to capital
|
||||
LIBHUNSPELL_DLL_EXPORTED std::vector<w_char>&
|
||||
mkallcap_utf(std::vector<w_char>& u, int langnum);
|
||||
|
||||
// get type of capitalization
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype(char * q, int nl, cs_info *);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype(const std::string& q, cs_info*);
|
||||
|
||||
// get type of capitalization (UTF-8)
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(w_char * q, int nl, int langnum);
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_captype_utf8(const std::vector<w_char>& q, int langnum);
|
||||
|
||||
// strip all ignored characters in the string
|
||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars_utf(char * word, unsigned short ignored_chars[], int ignored_len);
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars_utf(
|
||||
std::string& word,
|
||||
const std::vector<w_char>& ignored_chars);
|
||||
|
||||
// strip all ignored characters in the string
|
||||
LIBHUNSPELL_DLL_EXPORTED void remove_ignored_chars(char * word, char * ignored_chars);
|
||||
LIBHUNSPELL_DLL_EXPORTED size_t remove_ignored_chars(
|
||||
std::string& word,
|
||||
const std::string& ignored_chars);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int parse_string(char * line, char ** out, int ln);
|
||||
LIBHUNSPELL_DLL_EXPORTED int parse_string(char* line, char** out, int ln);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int parse_array(char * line, char ** out, unsigned short ** out_utf16,
|
||||
int * out_utf16_len, int utf8, int ln);
|
||||
LIBHUNSPELL_DLL_EXPORTED bool parse_array(char* line,
|
||||
char** out,
|
||||
std::vector<w_char>& out_utf16,
|
||||
int utf8,
|
||||
int ln);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char * r);
|
||||
LIBHUNSPELL_DLL_EXPORTED char * copy_field(char * dest, const char * morph, const char * var);
|
||||
LIBHUNSPELL_DLL_EXPORTED int fieldlen(const char* r);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char * s, const char * t);
|
||||
LIBHUNSPELL_DLL_EXPORTED bool copy_field(std::string& dest,
|
||||
const std::string& morph,
|
||||
const std::string& var);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char * morph);
|
||||
LIBHUNSPELL_DLL_EXPORTED int morphcmp(const char* s, const char* t);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int get_sfxcount(const char* morph);
|
||||
|
||||
// conversion function for protected memory
|
||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char * dest, char * source);
|
||||
LIBHUNSPELL_DLL_EXPORTED void store_pointer(char* dest, char* source);
|
||||
|
||||
// conversion function for protected memory
|
||||
LIBHUNSPELL_DLL_EXPORTED char * get_stored_pointer(const char * s);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* get_stored_pointer(const char* s);
|
||||
|
||||
// hash entry macros
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry *h)
|
||||
{
|
||||
char *ret;
|
||||
if (!h->var)
|
||||
ret = NULL;
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_DATA(struct hentry* h) {
|
||||
char* ret;
|
||||
if (!h->var)
|
||||
ret = NULL;
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA(
|
||||
const struct hentry* h) {
|
||||
const char* ret;
|
||||
if (!h->var)
|
||||
ret = NULL;
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
// NULL-free version for warning-free OOo build
|
||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(const struct hentry *h)
|
||||
{
|
||||
const char *ret;
|
||||
if (!h->var)
|
||||
ret = "";
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
LIBHUNSPELL_DLL_EXPORTED inline const char* HENTRY_DATA2(
|
||||
const struct hentry* h) {
|
||||
const char* ret;
|
||||
if (!h->var)
|
||||
ret = "";
|
||||
else if (h->var & H_OPT_ALIASM)
|
||||
ret = get_stored_pointer(HENTRY_WORD(h) + h->blen + 1);
|
||||
else
|
||||
ret = HENTRY_WORD(h) + h->blen + 1;
|
||||
return ret;
|
||||
}
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry *h, const char *p)
|
||||
{
|
||||
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
||||
LIBHUNSPELL_DLL_EXPORTED inline char* HENTRY_FIND(struct hentry* h,
|
||||
const char* p) {
|
||||
return (HENTRY_DATA(h) ? strstr(HENTRY_DATA(h), p) : NULL);
|
||||
}
|
||||
|
||||
#define w_char_eq(a,b) (((a).l == (b).l) && ((a).h == (b).h))
|
||||
|
||||
#endif
|
||||
|
@ -1,182 +0,0 @@
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <ctype.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "dictmgr.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
DictMgr::DictMgr(const char * dictpath, const char * etype) : numdict(0)
|
||||
{
|
||||
// load list of etype entries
|
||||
pdentry = (dictentry *)malloc(MAXDICTIONARIES*sizeof(struct dictentry));
|
||||
if (pdentry) {
|
||||
if (parse_file(dictpath, etype)) {
|
||||
numdict = 0;
|
||||
// no dictionary.lst found is okay
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
DictMgr::~DictMgr()
|
||||
{
|
||||
dictentry * pdict = NULL;
|
||||
if (pdentry) {
|
||||
pdict = pdentry;
|
||||
for (int i=0;i<numdict;i++) {
|
||||
if (pdict->lang) {
|
||||
free(pdict->lang);
|
||||
pdict->lang = NULL;
|
||||
}
|
||||
if (pdict->region) {
|
||||
free(pdict->region);
|
||||
pdict->region=NULL;
|
||||
}
|
||||
if (pdict->filename) {
|
||||
free(pdict->filename);
|
||||
pdict->filename = NULL;
|
||||
}
|
||||
pdict++;
|
||||
}
|
||||
free(pdentry);
|
||||
pdentry = NULL;
|
||||
pdict = NULL;
|
||||
}
|
||||
numdict = 0;
|
||||
}
|
||||
|
||||
|
||||
// read in list of etype entries and build up structure to describe them
|
||||
int DictMgr::parse_file(const char * dictpath, const char * etype)
|
||||
{
|
||||
|
||||
int i;
|
||||
char line[MAXDICTENTRYLEN+1];
|
||||
dictentry * pdict = pdentry;
|
||||
|
||||
// open the dictionary list file
|
||||
FILE * dictlst;
|
||||
dictlst = myfopen(dictpath,"r");
|
||||
if (!dictlst) {
|
||||
return 1;
|
||||
}
|
||||
|
||||
// step one is to parse the dictionary list building up the
|
||||
// descriptive structures
|
||||
|
||||
// read in each line ignoring any that dont start with etype
|
||||
while (fgets(line,MAXDICTENTRYLEN,dictlst)) {
|
||||
mychomp(line);
|
||||
|
||||
/* parse in a dictionary entry */
|
||||
if (strncmp(line,etype,4) == 0) {
|
||||
if (numdict < MAXDICTIONARIES) {
|
||||
char * tp = line;
|
||||
char * piece;
|
||||
i = 0;
|
||||
while ((piece=mystrsep(&tp,' '))) {
|
||||
if (*piece != '\0') {
|
||||
switch(i) {
|
||||
case 0: break;
|
||||
case 1: pdict->lang = mystrdup(piece); break;
|
||||
case 2: if (strcmp (piece, "ANY") == 0)
|
||||
pdict->region = mystrdup("");
|
||||
else
|
||||
pdict->region = mystrdup(piece);
|
||||
break;
|
||||
case 3: pdict->filename = mystrdup(piece); break;
|
||||
default: break;
|
||||
}
|
||||
i++;
|
||||
}
|
||||
free(piece);
|
||||
}
|
||||
if (i == 4) {
|
||||
numdict++;
|
||||
pdict++;
|
||||
} else {
|
||||
switch (i) {
|
||||
case 3:
|
||||
free(pdict->region);
|
||||
pdict->region=NULL;
|
||||
/* FALLTHROUGH */
|
||||
case 2:
|
||||
free(pdict->lang);
|
||||
pdict->lang=NULL;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
fprintf(stderr,"dictionary list corruption in line \"%s\"\n",line);
|
||||
fflush(stderr);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fclose(dictlst);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// return text encoding of dictionary
|
||||
int DictMgr::get_list(dictentry ** ppentry)
|
||||
{
|
||||
*ppentry = pdentry;
|
||||
return numdict;
|
||||
}
|
||||
|
||||
|
||||
|
||||
// strip strings into token based on single char delimiter
|
||||
// acts like strsep() but only uses a delim char and not
|
||||
// a delim string
|
||||
|
||||
char * DictMgr::mystrsep(char ** stringp, const char delim)
|
||||
{
|
||||
char * rv = NULL;
|
||||
char * mp = *stringp;
|
||||
size_t n = strlen(mp);
|
||||
if (n > 0) {
|
||||
char * dp = (char *)memchr(mp,(int)((unsigned char)delim),n);
|
||||
if (dp) {
|
||||
*stringp = dp+1;
|
||||
size_t nc = dp - mp;
|
||||
rv = (char *) malloc(nc+1);
|
||||
if (rv) {
|
||||
memcpy(rv,mp,nc);
|
||||
*(rv+nc) = '\0';
|
||||
}
|
||||
} else {
|
||||
rv = (char *) malloc(n+1);
|
||||
if (rv) {
|
||||
memcpy(rv, mp, n);
|
||||
*(rv+n) = '\0';
|
||||
*stringp = mp + n;
|
||||
}
|
||||
}
|
||||
}
|
||||
return rv;
|
||||
}
|
||||
|
||||
|
||||
// replaces strdup with ansi version
|
||||
char * DictMgr::mystrdup(const char * s)
|
||||
{
|
||||
char * d = NULL;
|
||||
if (s) {
|
||||
int sl = strlen(s)+1;
|
||||
d = (char *) malloc(sl);
|
||||
if (d) memcpy(d,s,sl);
|
||||
}
|
||||
return d;
|
||||
}
|
||||
|
||||
|
||||
// remove cross-platform text line end characters
|
||||
void DictMgr:: mychomp(char * s)
|
||||
{
|
||||
int k = strlen(s);
|
||||
if ((k > 0) && ((*(s+k-1)=='\r') || (*(s+k-1)=='\n'))) *(s+k-1) = '\0';
|
||||
if ((k > 1) && (*(s+k-2) == '\r')) *(s+k-2) = '\0';
|
||||
}
|
||||
|
@ -1,39 +0,0 @@
|
||||
#ifndef _DICTMGR_HXX_
|
||||
#define _DICTMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#define MAXDICTIONARIES 100
|
||||
#define MAXDICTENTRYLEN 1024
|
||||
|
||||
struct dictentry {
|
||||
char * filename;
|
||||
char * lang;
|
||||
char * region;
|
||||
};
|
||||
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED DictMgr
|
||||
{
|
||||
private:
|
||||
DictMgr(const DictMgr&);
|
||||
DictMgr& operator = (const DictMgr&);
|
||||
private:
|
||||
int numdict;
|
||||
dictentry * pdentry;
|
||||
|
||||
public:
|
||||
|
||||
DictMgr(const char * dictpath, const char * etype);
|
||||
~DictMgr();
|
||||
int get_list(dictentry** ppentry);
|
||||
|
||||
private:
|
||||
int parse_file(const char * dictpath, const char * etype);
|
||||
char * mystrsep(char ** stringp, const char delim);
|
||||
char * mystrdup(const char * s);
|
||||
void mychomp(char * s);
|
||||
|
||||
};
|
||||
|
||||
#endif
|
@ -1,5 +1,75 @@
|
||||
#include "license.hunspell"
|
||||
#include "license.myspell"
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
@ -8,46 +78,43 @@
|
||||
#include "filemgr.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
int FileMgr::fail(const char * err, const char * par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
int FileMgr::fail(const char* err, const char* par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
}
|
||||
|
||||
FileMgr::FileMgr(const char * file, const char * key)
|
||||
: hin(NULL)
|
||||
, linenum(0)
|
||||
{
|
||||
in[0] = '\0';
|
||||
FileMgr::FileMgr(const char* file, const char* key) : hin(NULL), linenum(0) {
|
||||
in[0] = '\0';
|
||||
|
||||
fin = myfopen(file, "r");
|
||||
if (!fin) {
|
||||
// check hzipped file
|
||||
char * st = (char *) malloc(strlen(file) + strlen(HZIP_EXTENSION) + 1);
|
||||
if (st) {
|
||||
strcpy(st, file);
|
||||
strcat(st, HZIP_EXTENSION);
|
||||
hin = new Hunzip(st, key);
|
||||
free(st);
|
||||
}
|
||||
}
|
||||
if (!fin && !hin) fail(MSG_OPEN, file);
|
||||
fin = myfopen(file, "r");
|
||||
if (!fin) {
|
||||
// check hzipped file
|
||||
std::string st(file);
|
||||
st.append(HZIP_EXTENSION);
|
||||
hin = new Hunzip(st.c_str(), key);
|
||||
}
|
||||
if (!fin && !hin)
|
||||
fail(MSG_OPEN, file);
|
||||
}
|
||||
|
||||
FileMgr::~FileMgr()
|
||||
{
|
||||
if (fin) fclose(fin);
|
||||
if (hin) delete hin;
|
||||
FileMgr::~FileMgr() {
|
||||
if (fin)
|
||||
fclose(fin);
|
||||
if (hin)
|
||||
delete hin;
|
||||
}
|
||||
|
||||
char * FileMgr::getline() {
|
||||
const char * l;
|
||||
linenum++;
|
||||
if (fin) return fgets(in, BUFSIZE - 1, fin);
|
||||
if (hin && ((l = hin->getline()) != NULL)) return strcpy(in, l);
|
||||
linenum--;
|
||||
return NULL;
|
||||
char* FileMgr::getline() {
|
||||
const char* l;
|
||||
linenum++;
|
||||
if (fin)
|
||||
return fgets(in, BUFSIZE - 1, fin);
|
||||
if (hin && ((l = hin->getline()) != NULL))
|
||||
return strcpy(in, l);
|
||||
linenum--;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int FileMgr::getlinenum() {
|
||||
return linenum;
|
||||
return linenum;
|
||||
}
|
||||
|
@ -1,3 +1,76 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* file manager class - read lines of files [filename] OR [filename.hz] */
|
||||
#ifndef _FILEMGR_HXX_
|
||||
#define _FILEMGR_HXX_
|
||||
@ -7,22 +80,22 @@
|
||||
#include "hunzip.hxx"
|
||||
#include <stdio.h>
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr
|
||||
{
|
||||
private:
|
||||
FileMgr(const FileMgr&);
|
||||
FileMgr& operator = (const FileMgr&);
|
||||
protected:
|
||||
FILE * fin;
|
||||
Hunzip * hin;
|
||||
char in[BUFSIZE + 50]; // input buffer
|
||||
int fail(const char * err, const char * par);
|
||||
int linenum;
|
||||
class LIBHUNSPELL_DLL_EXPORTED FileMgr {
|
||||
private:
|
||||
FileMgr(const FileMgr&);
|
||||
FileMgr& operator=(const FileMgr&);
|
||||
|
||||
public:
|
||||
FileMgr(const char * filename, const char * key = NULL);
|
||||
~FileMgr();
|
||||
char * getline();
|
||||
int getlinenum();
|
||||
protected:
|
||||
FILE* fin;
|
||||
Hunzip* hin;
|
||||
char in[BUFSIZE + 50]; // input buffer
|
||||
int fail(const char* err, const char* par);
|
||||
int linenum;
|
||||
|
||||
public:
|
||||
FileMgr(const char* filename, const char* key = NULL);
|
||||
~FileMgr();
|
||||
char* getline();
|
||||
int getlinenum();
|
||||
};
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,69 +1,149 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _HASHMGR_HXX_
|
||||
#define _HASHMGR_HXX_
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
#include "htypes.hxx"
|
||||
#include "filemgr.hxx"
|
||||
#include "w_char.hxx"
|
||||
|
||||
enum flag { FLAG_CHAR, FLAG_LONG, FLAG_NUM, FLAG_UNI };
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED HashMgr
|
||||
{
|
||||
int tablesize;
|
||||
struct hentry ** tableptr;
|
||||
int userword;
|
||||
flag flag_mode;
|
||||
int complexprefixes;
|
||||
int utf8;
|
||||
unsigned short forbiddenword;
|
||||
int langnum;
|
||||
char * enc;
|
||||
char * lang;
|
||||
struct cs_info * csconv;
|
||||
char * ignorechars;
|
||||
unsigned short * ignorechars_utf16;
|
||||
int ignorechars_utf16_len;
|
||||
int numaliasf; // flag vector `compression' with aliases
|
||||
unsigned short ** aliasf;
|
||||
unsigned short * aliasflen;
|
||||
int numaliasm; // morphological desciption `compression' with aliases
|
||||
char ** aliasm;
|
||||
class LIBHUNSPELL_DLL_EXPORTED HashMgr {
|
||||
int tablesize;
|
||||
struct hentry** tableptr;
|
||||
flag flag_mode;
|
||||
int complexprefixes;
|
||||
int utf8;
|
||||
unsigned short forbiddenword;
|
||||
int langnum;
|
||||
char* enc;
|
||||
char* lang;
|
||||
struct cs_info* csconv;
|
||||
char* ignorechars;
|
||||
std::vector<w_char> ignorechars_utf16;
|
||||
int numaliasf; // flag vector `compression' with aliases
|
||||
unsigned short** aliasf;
|
||||
unsigned short* aliasflen;
|
||||
int numaliasm; // morphological desciption `compression' with aliases
|
||||
char** aliasm;
|
||||
|
||||
|
||||
public:
|
||||
HashMgr(const char * tpath, const char * apath, const char * key = NULL);
|
||||
public:
|
||||
HashMgr(const char* tpath, const char* apath, const char* key = NULL);
|
||||
~HashMgr();
|
||||
|
||||
struct hentry * lookup(const char *) const;
|
||||
int hash(const char *) const;
|
||||
struct hentry * walk_hashtable(int & col, struct hentry * hp) const;
|
||||
struct hentry* lookup(const char*) const;
|
||||
int hash(const char*) const;
|
||||
struct hentry* walk_hashtable(int& col, struct hentry* hp) const;
|
||||
|
||||
int add(const char * word);
|
||||
int add_with_affix(const char * word, const char * pattern);
|
||||
int remove(const char * word);
|
||||
int decode_flags(unsigned short ** result, char * flags, FileMgr * af);
|
||||
unsigned short decode_flag(const char * flag);
|
||||
char * encode_flag(unsigned short flag);
|
||||
int add(const std::string& word);
|
||||
int add_with_affix(const char* word, const char* pattern);
|
||||
int remove(const char* word);
|
||||
int decode_flags(unsigned short** result, char* flags, FileMgr* af);
|
||||
unsigned short decode_flag(const char* flag);
|
||||
char* encode_flag(unsigned short flag);
|
||||
int is_aliasf();
|
||||
int get_aliasf(int index, unsigned short ** fvec, FileMgr * af);
|
||||
int get_aliasf(int index, unsigned short** fvec, FileMgr* af);
|
||||
int is_aliasm();
|
||||
char * get_aliasm(int index);
|
||||
|
||||
private:
|
||||
int get_clen_and_captype(const char * word, int wbl, int * captype);
|
||||
int load_tables(const char * tpath, const char * key);
|
||||
int add_word(const char * word, int wbl, int wcl, unsigned short * ap,
|
||||
int al, const char * desc, bool onlyupcase);
|
||||
int load_config(const char * affpath, const char * key);
|
||||
int parse_aliasf(char * line, FileMgr * af);
|
||||
int add_hidden_capitalized_word(char * word, int wbl, int wcl,
|
||||
unsigned short * flags, int al, char * dp, int captype);
|
||||
int parse_aliasm(char * line, FileMgr * af);
|
||||
int remove_forbidden_flag(const char * word);
|
||||
char* get_aliasm(int index);
|
||||
|
||||
private:
|
||||
int get_clen_and_captype(const std::string& word, int* captype);
|
||||
int load_tables(const char* tpath, const char* key);
|
||||
int add_word(const char* word,
|
||||
int wbl,
|
||||
int wcl,
|
||||
unsigned short* ap,
|
||||
int al,
|
||||
const char* desc,
|
||||
bool onlyupcase);
|
||||
int load_config(const char* affpath, const char* key);
|
||||
int parse_aliasf(char* line, FileMgr* af);
|
||||
int add_hidden_capitalized_word(const std::string& word,
|
||||
int wcl,
|
||||
unsigned short* flags,
|
||||
int al,
|
||||
char* dp,
|
||||
int captype);
|
||||
int parse_aliasm(char* line, FileMgr* af);
|
||||
int remove_forbidden_flag(const std::string& word);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,15 +1,55 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _HTYPES_HXX_
|
||||
#define _HTYPES_HXX_
|
||||
|
||||
#define ROTATE_LEN 5
|
||||
#define ROTATE_LEN 5
|
||||
|
||||
#define ROTATE(v,q) \
|
||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q))-1));
|
||||
#define ROTATE(v, q) \
|
||||
(v) = ((v) << (q)) | (((v) >> (32 - q)) & ((1 << (q)) - 1));
|
||||
|
||||
// hentry options
|
||||
#define H_OPT (1 << 0)
|
||||
#define H_OPT (1 << 0)
|
||||
#define H_OPT_ALIASM (1 << 1)
|
||||
#define H_OPT_PHON (1 << 2)
|
||||
#define H_OPT_PHON (1 << 2)
|
||||
|
||||
// see also csutil.hxx
|
||||
#define HENTRY_WORD(h) &(h->word[0])
|
||||
@ -17,16 +57,15 @@
|
||||
// approx. number of user defined words
|
||||
#define USERWORD 1000
|
||||
|
||||
struct hentry
|
||||
{
|
||||
unsigned char blen; // word length in bytes
|
||||
unsigned char clen; // word length in characters (different for UTF-8 enc.)
|
||||
short alen; // length of affix flag vector
|
||||
unsigned short * astr; // affix flag vector
|
||||
struct hentry * next; // next word with same hash code
|
||||
struct hentry * next_homonym; // next homonym word (with same hash code)
|
||||
char var; // variable fields (only for special pronounciation yet)
|
||||
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
||||
struct hentry {
|
||||
unsigned char blen; // word length in bytes
|
||||
unsigned char clen; // word length in characters (different for UTF-8 enc.)
|
||||
short alen; // length of affix flag vector
|
||||
unsigned short* astr; // affix flag vector
|
||||
struct hentry* next; // next word with same hash code
|
||||
struct hentry* next_homonym; // next homonym word (with same hash code)
|
||||
char var; // variable fields (only for special pronounciation yet)
|
||||
char word[1]; // variable-length word (8-bit or UTF-8 encoding)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,3 +1,43 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _MYSPELLMGR_H_
|
||||
#define _MYSPELLMGR_H_
|
||||
|
||||
@ -9,19 +49,26 @@ extern "C" {
|
||||
|
||||
typedef struct Hunhandle Hunhandle;
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create(const char * affpath, const char * dpath);
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create(const char* affpath,
|
||||
const char* dpath);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle *Hunspell_create_key(const char * affpath, const char * dpath,
|
||||
const char * key);
|
||||
LIBHUNSPELL_DLL_EXPORTED Hunhandle* Hunspell_create_key(const char* affpath,
|
||||
const char* dpath,
|
||||
const char* key);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle *pHunspell);
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_destroy(Hunhandle* pHunspell);
|
||||
|
||||
/* load extra dictionaries (only dic files)
|
||||
* output: 0 = additional dictionary slots available, 1 = slots are now full*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_dic(Hunhandle* pHunspell,
|
||||
const char* dpath);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle *pHunspell, const char *);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_spell(Hunhandle* pHunspell, const char*);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
||||
LIBHUNSPELL_DLL_EXPORTED char* Hunspell_get_dic_encoding(Hunhandle* pHunspell);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
@ -30,63 +77,83 @@ LIBHUNSPELL_DLL_EXPORTED char *Hunspell_get_dic_encoding(Hunhandle *pHunspell);
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_suggest(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word);
|
||||
|
||||
/* morphological functions */
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_analyze(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word);
|
||||
|
||||
/* stem(result, word) - stemmer function */
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle *pHunspell, char*** slst, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word);
|
||||
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = Hunspell_analyze(result, "words");
|
||||
* int n2 = Hunspell_stem2(result2, result, n1);
|
||||
*/
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = Hunspell_analyze(result, "words");
|
||||
* int n2 = Hunspell_stem2(result2, result, n1);
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle *pHunspell, char*** slst, char** desc, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_stem2(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
char** desc,
|
||||
int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
const char * word2);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word,
|
||||
const char* word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = Hunspell_generate2(result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = Hunspell_generate2(result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle *pHunspell, char*** slst, const char * word,
|
||||
char** desc, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_generate2(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
const char* word,
|
||||
char** desc,
|
||||
int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle *pHunspell, const char * word);
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle *pHunspell, const char * word, const char * example);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add(Hunhandle* pHunspell,
|
||||
const char* word);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle *pHunspell, const char * word);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_add_with_affix(Hunhandle* pHunspell,
|
||||
const char* word,
|
||||
const char* example);
|
||||
|
||||
/* free suggestion lists */
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle *pHunspell, char *** slst, int n);
|
||||
LIBHUNSPELL_DLL_EXPORTED int Hunspell_remove(Hunhandle* pHunspell,
|
||||
const char* word);
|
||||
|
||||
/* free suggestion lists */
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void Hunspell_free_list(Hunhandle* pHunspell,
|
||||
char*** slst,
|
||||
int n);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -1,42 +1,115 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
#include "hashmgr.hxx"
|
||||
#include "affixmgr.hxx"
|
||||
#include "suggestmgr.hxx"
|
||||
#include "langnum.hxx"
|
||||
#include <vector>
|
||||
|
||||
#define SPELL_XML "<?xml?>"
|
||||
#define SPELL_XML "<?xml?>"
|
||||
|
||||
#define MAXDIC 20
|
||||
#define MAXSUGGESTION 15
|
||||
#define MAXSHARPS 5
|
||||
|
||||
#define HUNSPELL_OK (1 << 0)
|
||||
#define HUNSPELL_OK_WARN (1 << 1)
|
||||
#define HUNSPELL_OK (1 << 0)
|
||||
#define HUNSPELL_OK_WARN (1 << 1)
|
||||
|
||||
#ifndef _MYSPELLMGR_HXX_
|
||||
#define _MYSPELLMGR_HXX_
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell
|
||||
{
|
||||
private:
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunspell {
|
||||
private:
|
||||
Hunspell(const Hunspell&);
|
||||
Hunspell& operator = (const Hunspell&);
|
||||
private:
|
||||
AffixMgr* pAMgr;
|
||||
HashMgr* pHMgr[MAXDIC];
|
||||
int maxdic;
|
||||
SuggestMgr* pSMgr;
|
||||
char * affixpath;
|
||||
char * encoding;
|
||||
struct cs_info * csconv;
|
||||
int langnum;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
char** wordbreak;
|
||||
Hunspell& operator=(const Hunspell&);
|
||||
|
||||
public:
|
||||
private:
|
||||
AffixMgr* pAMgr;
|
||||
HashMgr* pHMgr[MAXDIC];
|
||||
int maxdic;
|
||||
SuggestMgr* pSMgr;
|
||||
char* affixpath;
|
||||
char* encoding;
|
||||
struct cs_info* csconv;
|
||||
int langnum;
|
||||
int utf8;
|
||||
int complexprefixes;
|
||||
char** wordbreak;
|
||||
|
||||
public:
|
||||
/* Hunspell(aff, dic) - constructor of Hunspell class
|
||||
* input: path of affix file and dictionary file
|
||||
*
|
||||
@ -46,23 +119,23 @@ public:
|
||||
* with system-dependent character encoding instead of _wfopen()).
|
||||
*/
|
||||
|
||||
Hunspell(const char * affpath, const char * dpath, const char * key = NULL);
|
||||
Hunspell(const char* affpath, const char* dpath, const char* key = NULL);
|
||||
~Hunspell();
|
||||
|
||||
/* load extra dictionaries (only dic files) */
|
||||
int add_dic(const char * dpath, const char * key = NULL);
|
||||
int add_dic(const char* dpath, const char* key = NULL);
|
||||
|
||||
/* spell(word) - spellcheck word
|
||||
* output: 0 = bad word, not 0 = good word
|
||||
*
|
||||
*
|
||||
* plus output:
|
||||
* info: information bit array, fields:
|
||||
* SPELL_COMPOUND = a compound word
|
||||
* SPELL_COMPOUND = a compound word
|
||||
* SPELL_FORBIDDEN = an explicit forbidden word
|
||||
* root: root (stem), when input is a word with affix(es)
|
||||
*/
|
||||
|
||||
int spell(const char * word, int * info = NULL, char ** root = NULL);
|
||||
|
||||
int spell(const char* word, int* info = NULL, char** root = NULL);
|
||||
|
||||
/* suggest(suggestions, word) - search suggestions
|
||||
* input: pointer to an array of strings pointer and the (bad) word
|
||||
@ -72,113 +145,114 @@ public:
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
|
||||
int suggest(char*** slst, const char * word);
|
||||
int suggest(char*** slst, const char* word);
|
||||
|
||||
/* Suggest words from suffix rules
|
||||
* suffix_suggest(suggestions, root_word)
|
||||
* input: pointer to an array of strings pointer and the word
|
||||
* array of strings pointer (here *slst) may not be initialized
|
||||
* output: number of suggestions in string array, and suggestions in
|
||||
* a newly allocated array of strings (*slts will be NULL when number
|
||||
* of suggestion equals 0.)
|
||||
*/
|
||||
int suffix_suggest(char*** slst, const char* root_word);
|
||||
|
||||
/* deallocate suggestion lists */
|
||||
|
||||
void free_list(char *** slst, int n);
|
||||
void free_list(char*** slst, int n);
|
||||
|
||||
char * get_dic_encoding();
|
||||
char* get_dic_encoding();
|
||||
|
||||
/* morphological functions */
|
||||
/* morphological functions */
|
||||
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
|
||||
int analyze(char*** slst, const char * word);
|
||||
/* analyze(result, word) - morphological analysis of the word */
|
||||
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
int stem(char*** slst, const char * word);
|
||||
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = analyze(&result, "words");
|
||||
* int n2 = stem(&result2, result, n1);
|
||||
*/
|
||||
|
||||
int stem(char*** slst, char ** morph, int n);
|
||||
int analyze(char*** slst, const char* word);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
/* stem(result, word) - stemmer function */
|
||||
|
||||
int generate(char*** slst, const char * word, const char * word2);
|
||||
int stem(char*** slst, const char* word);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = generate(&result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
/* stem(result, analysis, n) - get stems from a morph. analysis
|
||||
* example:
|
||||
* char ** result, result2;
|
||||
* int n1 = analyze(&result, "words");
|
||||
* int n2 = stem(&result2, result, n1);
|
||||
*/
|
||||
|
||||
int generate(char*** slst, const char * word, char ** desc, int n);
|
||||
int stem(char*** slst, char** morph, int n);
|
||||
|
||||
/* generate(result, word, word2) - morphological generation by example(s) */
|
||||
|
||||
int generate(char*** slst, const char* word, const char* word2);
|
||||
|
||||
/* generate(result, word, desc, n) - generation by morph. description(s)
|
||||
* example:
|
||||
* char ** result;
|
||||
* char * affix = "is:plural"; // description depends from dictionaries, too
|
||||
* int n = generate(&result, "word", &affix, 1);
|
||||
* for (int i = 0; i < n; i++) printf("%s\n", result[i]);
|
||||
*/
|
||||
|
||||
int generate(char*** slst, const char* word, char** desc, int n);
|
||||
|
||||
/* functions for run-time modification of the dictionary */
|
||||
|
||||
/* add word to the run-time dictionary */
|
||||
|
||||
int add(const char * word);
|
||||
|
||||
int add(const char* word);
|
||||
|
||||
/* add word to the run-time dictionary with affix flags of
|
||||
* the example (a dictionary word): Hunspell will recognize
|
||||
* affixed forms of the new word, too.
|
||||
*/
|
||||
|
||||
int add_with_affix(const char * word, const char * example);
|
||||
|
||||
int add_with_affix(const char* word, const char* example);
|
||||
|
||||
/* remove word from the run-time dictionary */
|
||||
|
||||
int remove(const char * word);
|
||||
int remove(const char* word);
|
||||
|
||||
/* other */
|
||||
|
||||
/* get extra word characters definied in affix file for tokenization */
|
||||
const char * get_wordchars();
|
||||
unsigned short * get_wordchars_utf16(int * len);
|
||||
const char* get_wordchars();
|
||||
const std::vector<w_char>& get_wordchars_utf16();
|
||||
|
||||
struct cs_info * get_csconv();
|
||||
const char * get_version();
|
||||
struct cs_info* get_csconv();
|
||||
const char* get_version();
|
||||
|
||||
int get_langnum() const;
|
||||
|
||||
/* need for putdic */
|
||||
int input_conv(const char * word, char * dest);
|
||||
|
||||
/* experimental and deprecated functions */
|
||||
|
||||
#ifdef HUNSPELL_EXPERIMENTAL
|
||||
/* suffix is an affix flag string, similarly in dictionary files */
|
||||
int put_word_suffix(const char * word, const char * suffix);
|
||||
char * morph_with_correction(const char * word);
|
||||
|
||||
/* spec. suggestions */
|
||||
int suggest_auto(char*** slst, const char * word);
|
||||
int suggest_pos_stems(char*** slst, const char * word);
|
||||
#endif
|
||||
|
||||
private:
|
||||
int cleanword(char *, const char *, int * pcaptype, int * pabbrev);
|
||||
int cleanword2(char *, const char *, w_char *, int * w_len, int * pcaptype, int * pabbrev);
|
||||
void mkinitcap(char *);
|
||||
int mkinitcap2(char * p, w_char * u, int nc);
|
||||
int mkinitsmall2(char * p, w_char * u, int nc);
|
||||
void mkallcap(char *);
|
||||
int mkallcap2(char * p, w_char * u, int nc);
|
||||
void mkallsmall(char *);
|
||||
int mkallsmall2(char * p, w_char * u, int nc);
|
||||
struct hentry * checkword(const char *, int * info, char **root);
|
||||
char * sharps_u8_l1(char * dest, char * source);
|
||||
hentry * spellsharps(char * base, char *, int, int, char * tmp, int * info, char **root);
|
||||
int is_keepcase(const hentry * rv);
|
||||
int insert_sug(char ***slst, char * word, int ns);
|
||||
void cat_result(char * result, char * st);
|
||||
char * stem_description(const char * desc);
|
||||
int spellml(char*** slst, const char * word);
|
||||
int get_xml_par(char * dest, const char * par, int maxl);
|
||||
const char * get_xml_pos(const char * s, const char * attr);
|
||||
int get_xml_list(char ***slst, char * list, const char * tag);
|
||||
int check_xml_par(const char * q, const char * attr, const char * value);
|
||||
int input_conv(const char* word, char* dest, size_t destsize);
|
||||
|
||||
private:
|
||||
void cleanword(std::string& dest, const char*, int* pcaptype, int* pabbrev);
|
||||
size_t cleanword2(std::string& dest,
|
||||
std::vector<w_char>& dest_u,
|
||||
const char*,
|
||||
int* w_len,
|
||||
int* pcaptype,
|
||||
size_t* pabbrev);
|
||||
void mkinitcap(std::string& u8);
|
||||
int mkinitcap2(std::string& u8, std::vector<w_char>& u16);
|
||||
int mkinitsmall2(std::string& u8, std::vector<w_char>& u16);
|
||||
void mkallcap(std::string& u8);
|
||||
int mkallsmall2(std::string& u8, std::vector<w_char>& u16);
|
||||
struct hentry* checkword(const char*, int* info, char** root);
|
||||
std::string sharps_u8_l1(const std::string& source);
|
||||
hentry*
|
||||
spellsharps(std::string& base, size_t start_pos, int, int, int* info, char** root);
|
||||
int is_keepcase(const hentry* rv);
|
||||
int insert_sug(char*** slst, const char* word, int ns);
|
||||
void cat_result(std::string& result, char* st);
|
||||
char* stem_description(const char* desc);
|
||||
int spellml(char*** slst, const char* word);
|
||||
std::string get_xml_par(const char* par);
|
||||
const char* get_xml_pos(const char* s, const char* attr);
|
||||
int get_xml_list(char*** slst, const char* list, const char* tag);
|
||||
int check_xml_par(const char* q, const char* attr, const char* value);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,11 +1,51 @@
|
||||
#include <stdlib.h>
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
|
||||
#include "hunzip.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
#define CODELEN 65536
|
||||
#define CODELEN 65536
|
||||
#define BASEBITREC 5000
|
||||
|
||||
#define UNCOMPRESSED '\002'
|
||||
@ -13,184 +53,211 @@
|
||||
#define MAGIC_ENCRYPT "hz1"
|
||||
#define MAGICLEN (sizeof(MAGIC) - 1)
|
||||
|
||||
int Hunzip::fail(const char * err, const char * par) {
|
||||
fprintf(stderr, err, par);
|
||||
int Hunzip::fail(const char* err, const char* par) {
|
||||
fprintf(stderr, err, par);
|
||||
return -1;
|
||||
}
|
||||
|
||||
Hunzip::Hunzip(const char* file, const char* key)
|
||||
: fin(NULL), bufsiz(0), lastbit(0), inc(0), inbits(0), outc(0), dec(NULL) {
|
||||
in[0] = out[0] = line[0] = '\0';
|
||||
filename = mystrdup(file);
|
||||
if (getcode(key) == -1)
|
||||
bufsiz = -1;
|
||||
else
|
||||
bufsiz = getbuf();
|
||||
}
|
||||
|
||||
int Hunzip::getcode(const char* key) {
|
||||
unsigned char c[2];
|
||||
int i, j, n, p;
|
||||
int allocatedbit = BASEBITREC;
|
||||
const char* enc = key;
|
||||
|
||||
if (!filename)
|
||||
return -1;
|
||||
}
|
||||
|
||||
Hunzip::Hunzip(const char * file, const char * key)
|
||||
: fin(NULL)
|
||||
, bufsiz(0)
|
||||
, lastbit(0)
|
||||
, inc(0)
|
||||
, inbits(0)
|
||||
, outc(0)
|
||||
, dec(NULL)
|
||||
{
|
||||
in[0] = out[0] = line[0] = '\0';
|
||||
filename = mystrdup(file);
|
||||
if (getcode(key) == -1) bufsiz = -1;
|
||||
else bufsiz = getbuf();
|
||||
}
|
||||
fin = myfopen(filename, "rb");
|
||||
if (!fin)
|
||||
return -1;
|
||||
|
||||
int Hunzip::getcode(const char * key) {
|
||||
unsigned char c[2];
|
||||
int i, j, n, p;
|
||||
int allocatedbit = BASEBITREC;
|
||||
const char * enc = key;
|
||||
// read magic number
|
||||
if ((fread(in, 1, 3, fin) < MAGICLEN) ||
|
||||
!(strncmp(MAGIC, in, MAGICLEN) == 0 ||
|
||||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
if (!filename) return -1;
|
||||
// check encryption
|
||||
if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
|
||||
unsigned char cs;
|
||||
if (!key)
|
||||
return fail(MSG_KEY, filename);
|
||||
if (fread(&c, 1, 1, fin) < 1)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
for (cs = 0; *enc; enc++)
|
||||
cs ^= *enc;
|
||||
if (cs != c[0])
|
||||
return fail(MSG_KEY, filename);
|
||||
enc = key;
|
||||
} else
|
||||
key = NULL;
|
||||
|
||||
fin = myfopen(filename, "rb");
|
||||
if (!fin) return -1;
|
||||
// read record count
|
||||
if (fread(&c, 1, 2, fin) < 2)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
|
||||
// read magic number
|
||||
if ((fread(in, 1, 3, fin) < MAGICLEN)
|
||||
|| !(strncmp(MAGIC, in, MAGICLEN) == 0 ||
|
||||
strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0)) {
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
if (key) {
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
|
||||
// check encryption
|
||||
if (strncmp(MAGIC_ENCRYPT, in, MAGICLEN) == 0) {
|
||||
unsigned char cs;
|
||||
if (!key) return fail(MSG_KEY, filename);
|
||||
if (fread(&c, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
|
||||
for (cs = 0; *enc; enc++) cs ^= *enc;
|
||||
if (cs != c[0]) return fail(MSG_KEY, filename);
|
||||
enc = key;
|
||||
} else key = NULL;
|
||||
|
||||
// read record count
|
||||
if (fread(&c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
|
||||
n = ((int)c[0] << 8) + c[1];
|
||||
dec = (struct bit*)malloc(BASEBITREC * sizeof(struct bit));
|
||||
if (!dec)
|
||||
return fail(MSG_MEMORY, filename);
|
||||
dec[0].v[0] = 0;
|
||||
dec[0].v[1] = 0;
|
||||
|
||||
// read codes
|
||||
for (i = 0; i < n; i++) {
|
||||
unsigned char l;
|
||||
if (fread(c, 1, 2, fin) < 2)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
|
||||
n = ((int) c[0] << 8) + c[1];
|
||||
dec = (struct bit *) malloc(BASEBITREC * sizeof(struct bit));
|
||||
if (!dec) return fail(MSG_MEMORY, filename);
|
||||
dec[0].v[0] = 0;
|
||||
dec[0].v[1] = 0;
|
||||
|
||||
// read codes
|
||||
for (i = 0; i < n; i++) {
|
||||
unsigned char l;
|
||||
if (fread(c, 1, 2, fin) < 2) return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
if (fread(&l, 1, 1, fin) < 1) return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
l ^= *enc;
|
||||
}
|
||||
if (fread(in, 1, l/8+1, fin) < (size_t) l/8+1) return fail(MSG_FORMAT, filename);
|
||||
if (key) for (j = 0; j <= l/8; j++) {
|
||||
if (*(++enc) == '\0') enc = key;
|
||||
in[j] ^= *enc;
|
||||
}
|
||||
p = 0;
|
||||
for (j = 0; j < l; j++) {
|
||||
int b = (in[j/8] & (1 << (7 - (j % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
lastbit++;
|
||||
if (lastbit == allocatedbit) {
|
||||
allocatedbit += BASEBITREC;
|
||||
dec = (struct bit *) realloc(dec, allocatedbit * sizeof(struct bit));
|
||||
}
|
||||
dec[lastbit].v[0] = 0;
|
||||
dec[lastbit].v[1] = 0;
|
||||
dec[oldp].v[b] = lastbit;
|
||||
p = lastbit;
|
||||
}
|
||||
}
|
||||
dec[p].c[0] = c[0];
|
||||
dec[p].c[1] = c[1];
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
c[0] ^= *enc;
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
c[1] ^= *enc;
|
||||
}
|
||||
return 0;
|
||||
if (fread(&l, 1, 1, fin) < 1)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
l ^= *enc;
|
||||
}
|
||||
if (fread(in, 1, l / 8 + 1, fin) < (size_t)l / 8 + 1)
|
||||
return fail(MSG_FORMAT, filename);
|
||||
if (key)
|
||||
for (j = 0; j <= l / 8; j++) {
|
||||
if (*(++enc) == '\0')
|
||||
enc = key;
|
||||
in[j] ^= *enc;
|
||||
}
|
||||
p = 0;
|
||||
for (j = 0; j < l; j++) {
|
||||
int b = (in[j / 8] & (1 << (7 - (j % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
lastbit++;
|
||||
if (lastbit == allocatedbit) {
|
||||
allocatedbit += BASEBITREC;
|
||||
dec = (struct bit*)realloc(dec, allocatedbit * sizeof(struct bit));
|
||||
}
|
||||
dec[lastbit].v[0] = 0;
|
||||
dec[lastbit].v[1] = 0;
|
||||
dec[oldp].v[b] = lastbit;
|
||||
p = lastbit;
|
||||
}
|
||||
}
|
||||
dec[p].c[0] = c[0];
|
||||
dec[p].c[1] = c[1];
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
Hunzip::~Hunzip()
|
||||
{
|
||||
if (dec) free(dec);
|
||||
if (fin) fclose(fin);
|
||||
if (filename) free(filename);
|
||||
Hunzip::~Hunzip() {
|
||||
if (dec)
|
||||
free(dec);
|
||||
if (fin)
|
||||
fclose(fin);
|
||||
if (filename)
|
||||
free(filename);
|
||||
}
|
||||
|
||||
int Hunzip::getbuf() {
|
||||
int p = 0;
|
||||
int o = 0;
|
||||
do {
|
||||
if (inc == 0) inbits = fread(in, 1, BUFSIZE, fin) * 8;
|
||||
for (; inc < inbits; inc++) {
|
||||
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
if (oldp == lastbit) {
|
||||
fclose(fin);
|
||||
fin = NULL;
|
||||
// add last odd byte
|
||||
if (dec[lastbit].c[0]) out[o++] = dec[lastbit].c[1];
|
||||
return o;
|
||||
}
|
||||
out[o++] = dec[oldp].c[0];
|
||||
out[o++] = dec[oldp].c[1];
|
||||
if (o == BUFSIZE) return o;
|
||||
p = dec[p].v[b];
|
||||
}
|
||||
int p = 0;
|
||||
int o = 0;
|
||||
do {
|
||||
if (inc == 0)
|
||||
inbits = fread(in, 1, BUFSIZE, fin) * 8;
|
||||
for (; inc < inbits; inc++) {
|
||||
int b = (in[inc / 8] & (1 << (7 - (inc % 8)))) ? 1 : 0;
|
||||
int oldp = p;
|
||||
p = dec[p].v[b];
|
||||
if (p == 0) {
|
||||
if (oldp == lastbit) {
|
||||
fclose(fin);
|
||||
fin = NULL;
|
||||
// add last odd byte
|
||||
if (dec[lastbit].c[0])
|
||||
out[o++] = dec[lastbit].c[1];
|
||||
return o;
|
||||
}
|
||||
inc = 0;
|
||||
} while (inbits == BUFSIZE * 8);
|
||||
return fail(MSG_FORMAT, filename);
|
||||
out[o++] = dec[oldp].c[0];
|
||||
out[o++] = dec[oldp].c[1];
|
||||
if (o == BUFSIZE)
|
||||
return o;
|
||||
p = dec[p].v[b];
|
||||
}
|
||||
}
|
||||
inc = 0;
|
||||
} while (inbits == BUFSIZE * 8);
|
||||
return fail(MSG_FORMAT, filename);
|
||||
}
|
||||
|
||||
const char * Hunzip::getline() {
|
||||
char linebuf[BUFSIZE];
|
||||
int l = 0, eol = 0, left = 0, right = 0;
|
||||
if (bufsiz == -1) return NULL;
|
||||
while (l < bufsiz && !eol) {
|
||||
linebuf[l++] = out[outc];
|
||||
switch (out[outc]) {
|
||||
case '\t': break;
|
||||
case 31: { // escape
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
linebuf[l - 1] = out[outc];
|
||||
break;
|
||||
}
|
||||
case ' ': break;
|
||||
default: if (((unsigned char) out[outc]) < 47) {
|
||||
if (out[outc] > 32) {
|
||||
right = out[outc] - 31;
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
}
|
||||
if (out[outc] == 30) left = 9; else left = out[outc];
|
||||
linebuf[l-1] = '\n';
|
||||
eol = 1;
|
||||
}
|
||||
}
|
||||
const char* Hunzip::getline() {
|
||||
char linebuf[BUFSIZE];
|
||||
int l = 0, eol = 0, left = 0, right = 0;
|
||||
if (bufsiz == -1)
|
||||
return NULL;
|
||||
while (l < bufsiz && !eol) {
|
||||
linebuf[l++] = out[outc];
|
||||
switch (out[outc]) {
|
||||
case '\t':
|
||||
break;
|
||||
case 31: { // escape
|
||||
if (++outc == bufsiz) {
|
||||
outc = 0;
|
||||
bufsiz = fin ? getbuf(): -1;
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
linebuf[l - 1] = out[outc];
|
||||
break;
|
||||
}
|
||||
case ' ':
|
||||
break;
|
||||
default:
|
||||
if (((unsigned char)out[outc]) < 47) {
|
||||
if (out[outc] > 32) {
|
||||
right = out[outc] - 31;
|
||||
if (++outc == bufsiz) {
|
||||
bufsiz = getbuf();
|
||||
outc = 0;
|
||||
}
|
||||
}
|
||||
if (out[outc] == 30)
|
||||
left = 9;
|
||||
else
|
||||
left = out[outc];
|
||||
linebuf[l - 1] = '\n';
|
||||
eol = 1;
|
||||
}
|
||||
}
|
||||
if (right) strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
|
||||
else linebuf[l] = '\0';
|
||||
strcpy(line + left, linebuf);
|
||||
return line;
|
||||
if (++outc == bufsiz) {
|
||||
outc = 0;
|
||||
bufsiz = fin ? getbuf() : -1;
|
||||
}
|
||||
}
|
||||
if (right)
|
||||
strcpy(linebuf + l - 1, line + strlen(line) - right - 1);
|
||||
else
|
||||
linebuf[l] = '\0';
|
||||
strcpy(line + left, linebuf);
|
||||
return line;
|
||||
}
|
||||
|
@ -1,3 +1,43 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
/* hunzip: file decompression for sorted dictionaries with optional encryption,
|
||||
* algorithm: prefix-suffix encoding and 16-bit Huffman encoding */
|
||||
|
||||
@ -8,40 +48,40 @@
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
#define BUFSIZE 65536
|
||||
#define BUFSIZE 65536
|
||||
#define HZIP_EXTENSION ".hz"
|
||||
|
||||
#define MSG_OPEN "error: %s: cannot open\n"
|
||||
#define MSG_OPEN "error: %s: cannot open\n"
|
||||
#define MSG_FORMAT "error: %s: not in hzip format\n"
|
||||
#define MSG_MEMORY "error: %s: missing memory\n"
|
||||
#define MSG_KEY "error: %s: missing or bad password\n"
|
||||
#define MSG_KEY "error: %s: missing or bad password\n"
|
||||
|
||||
struct bit {
|
||||
unsigned char c[2];
|
||||
int v[2];
|
||||
unsigned char c[2];
|
||||
int v[2];
|
||||
};
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip
|
||||
{
|
||||
private:
|
||||
Hunzip(const Hunzip&);
|
||||
Hunzip& operator = (const Hunzip&);
|
||||
protected:
|
||||
char * filename;
|
||||
FILE * fin;
|
||||
int bufsiz, lastbit, inc, inbits, outc;
|
||||
struct bit * dec; // code table
|
||||
char in[BUFSIZE]; // input buffer
|
||||
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
||||
char line[BUFSIZE + 50]; // decoded line
|
||||
int getcode(const char * key);
|
||||
int getbuf();
|
||||
int fail(const char * err, const char * par);
|
||||
|
||||
public:
|
||||
Hunzip(const char * filename, const char * key = NULL);
|
||||
~Hunzip();
|
||||
const char * getline();
|
||||
class LIBHUNSPELL_DLL_EXPORTED Hunzip {
|
||||
private:
|
||||
Hunzip(const Hunzip&);
|
||||
Hunzip& operator=(const Hunzip&);
|
||||
|
||||
protected:
|
||||
char* filename;
|
||||
FILE* fin;
|
||||
int bufsiz, lastbit, inc, inbits, outc;
|
||||
struct bit* dec; // code table
|
||||
char in[BUFSIZE]; // input buffer
|
||||
char out[BUFSIZE + 1]; // Huffman-decoded buffer
|
||||
char line[BUFSIZE + 50]; // decoded line
|
||||
int getcode(const char* key);
|
||||
int getbuf();
|
||||
int fail(const char* err, const char* par);
|
||||
|
||||
public:
|
||||
Hunzip(const char* filename, const char* key = NULL);
|
||||
~Hunzip();
|
||||
const char* getline();
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -1,3 +1,43 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef _LANGNUM_HXX_
|
||||
#define _LANGNUM_HXX_
|
||||
|
||||
@ -7,32 +47,32 @@
|
||||
*/
|
||||
|
||||
enum {
|
||||
LANG_ar=96,
|
||||
LANG_az=100, // custom number
|
||||
LANG_bg=41,
|
||||
LANG_ca=37,
|
||||
LANG_cs=42,
|
||||
LANG_da=45,
|
||||
LANG_de=49,
|
||||
LANG_el=30,
|
||||
LANG_en=01,
|
||||
LANG_es=34,
|
||||
LANG_eu=10,
|
||||
LANG_fr=02,
|
||||
LANG_gl=38,
|
||||
LANG_hr=78,
|
||||
LANG_hu=36,
|
||||
LANG_it=39,
|
||||
LANG_la=99, // custom number
|
||||
LANG_lv=101, // custom number
|
||||
LANG_nl=31,
|
||||
LANG_pl=48,
|
||||
LANG_pt=03,
|
||||
LANG_ru=07,
|
||||
LANG_sv=50,
|
||||
LANG_tr=90,
|
||||
LANG_uk=80,
|
||||
LANG_xx=999
|
||||
LANG_ar = 96,
|
||||
LANG_az = 100, // custom number
|
||||
LANG_bg = 41,
|
||||
LANG_ca = 37,
|
||||
LANG_cs = 42,
|
||||
LANG_da = 45,
|
||||
LANG_de = 49,
|
||||
LANG_el = 30,
|
||||
LANG_en = 01,
|
||||
LANG_es = 34,
|
||||
LANG_eu = 10,
|
||||
LANG_fr = 02,
|
||||
LANG_gl = 38,
|
||||
LANG_hr = 78,
|
||||
LANG_hu = 36,
|
||||
LANG_it = 39,
|
||||
LANG_la = 99, // custom number
|
||||
LANG_lv = 101, // custom number
|
||||
LANG_nl = 31,
|
||||
LANG_pl = 48,
|
||||
LANG_pt = 03,
|
||||
LANG_ru = 07,
|
||||
LANG_sv = 50,
|
||||
LANG_tr = 90,
|
||||
LANG_uk = 80,
|
||||
LANG_xx = 999
|
||||
};
|
||||
|
||||
#endif
|
||||
|
@ -4,11 +4,10 @@
|
||||
# License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
||||
|
||||
SOURCES += [
|
||||
UNIFIED_SOURCES += [
|
||||
'affentry.cxx',
|
||||
'affixmgr.cxx',
|
||||
'csutil.cxx',
|
||||
'dictmgr.cxx',
|
||||
'filemgr.cxx',
|
||||
'hashmgr.cxx',
|
||||
'hunspell.cxx',
|
||||
|
@ -1,24 +0,0 @@
|
||||
Bug 675553 - Switch from PRBool to bool.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
@@ -5517,17 +5517,17 @@ struct cs_info * get_current_cs(const ch
|
||||
if (NS_FAILED(rv))
|
||||
return ccs;
|
||||
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
|
||||
|
||||
if (NS_FAILED(rv))
|
||||
return ccs;
|
||||
|
||||
for (unsigned int i = 0; i <= 0xff; ++i) {
|
||||
- PRBool success = PR_FALSE;
|
||||
+ bool success = false;
|
||||
// We want to find the upper/lowercase equivalents of each byte
|
||||
// in this 1-byte character encoding. Call our encoding/decoding
|
||||
// APIs separately for each byte since they may reject some of the
|
||||
// bytes, and we want to handle errors separately for each byte.
|
||||
char lower, upper;
|
||||
do {
|
||||
if (i == 0)
|
||||
break;
|
@ -1,24 +0,0 @@
|
||||
Bug 690892 - Replace PR_TRUE/PR_FALSE with true/false.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
@@ -5549,17 +5549,17 @@ struct cs_info * get_current_cs(const ch
|
||||
|
||||
uniCased = ToUpperCase(uni);
|
||||
rv = encoder->Convert(&uniCased, &uniLength, &upper, &charLength);
|
||||
// Explicitly check NS_OK because we don't want to allow
|
||||
// NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
|
||||
if (rv != NS_OK || charLength != 1 || uniLength != 1)
|
||||
break;
|
||||
|
||||
- success = PR_TRUE;
|
||||
+ success = true;
|
||||
} while (0);
|
||||
|
||||
if (success) {
|
||||
ccs[i].cupper = upper;
|
||||
ccs[i].clower = lower;
|
||||
} else {
|
||||
ccs[i].cupper = i;
|
||||
ccs[i].clower = i;
|
@ -1,25 +0,0 @@
|
||||
Silence the warning about empty while body loop in clang.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/affentry.cxx b/extensions/spellcheck/hunspell/src/affentry.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/affentry.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/affentry.cxx
|
||||
@@ -571,17 +571,18 @@ inline int SfxEntry::test_condition(cons
|
||||
while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||
st--;
|
||||
}
|
||||
if (p && *p != ']') p = nextchar(p);
|
||||
} else if (pos) {
|
||||
if (neg) return 0;
|
||||
else if (i == numconds) return 1;
|
||||
ingroup = true;
|
||||
- while (p && *p != ']' && ((p = nextchar(p)) != NULL));
|
||||
+ while (p && *p != ']' && ((p = nextchar(p)) != NULL))
|
||||
+ ;
|
||||
// if (p && *p != ']') p = nextchar(p);
|
||||
st--;
|
||||
}
|
||||
if (!pos) {
|
||||
i++;
|
||||
st--;
|
||||
}
|
||||
if (st < beg && p && *p != ']') return 0; // word <= condition
|
@ -1,24 +0,0 @@
|
||||
Bug 777292 - Change nsnull to nullptr.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
@@ -5507,17 +5507,17 @@ struct cs_info * get_current_cs(const ch
|
||||
nsresult rv;
|
||||
nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
if (NS_FAILED(rv))
|
||||
return ccs;
|
||||
|
||||
rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
|
||||
if (NS_FAILED(rv))
|
||||
return ccs;
|
||||
- encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nsnull, '?');
|
||||
+ encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?');
|
||||
rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
|
||||
if (NS_FAILED(rv))
|
||||
return ccs;
|
||||
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
|
||||
|
||||
if (NS_FAILED(rv))
|
||||
return ccs;
|
||||
|
@ -1,24 +0,0 @@
|
||||
Bug 579517 - Convert NSPR numeric types to stdint types.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
@@ -5528,17 +5528,17 @@ struct cs_info * get_current_cs(const ch
|
||||
// APIs separately for each byte since they may reject some of the
|
||||
// bytes, and we want to handle errors separately for each byte.
|
||||
char lower, upper;
|
||||
do {
|
||||
if (i == 0)
|
||||
break;
|
||||
const char source = char(i);
|
||||
PRUnichar uni, uniCased;
|
||||
- PRInt32 charLength = 1, uniLength = 1;
|
||||
+ int32_t charLength = 1, uniLength = 1;
|
||||
|
||||
rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
|
||||
// Explicitly check NS_OK because we don't want to allow
|
||||
// NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
|
||||
if (rv != NS_OK || charLength != 1 || uniLength != 1)
|
||||
break;
|
||||
uniCased = ToLowerCase(uni);
|
||||
rv = encoder->Convert(&uniCased, &uniLength, &lower, &charLength);
|
@ -1,43 +0,0 @@
|
||||
Bug 784776 - Don't assume NULL is numeric in Hunspell code.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/affentry.hxx b/extensions/spellcheck/hunspell/src/affentry.hxx
|
||||
--- a/extensions/spellcheck/hunspell/src/affentry.hxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/affentry.hxx
|
||||
@@ -26,17 +26,17 @@ public:
|
||||
|
||||
PfxEntry(AffixMgr* pmgr, affentry* dp );
|
||||
~PfxEntry();
|
||||
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry * checkword(const char * word, int len, char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
- struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = NULL);
|
||||
+ struct hentry * check_twosfx(const char * word, int len, char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * check_morph(const char * word, int len, char in_compound,
|
||||
const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * check_twosfx_morph(const char * word, int len,
|
||||
char in_compound, const FLAG needflag = FLAG_NULL);
|
||||
|
||||
inline FLAG getFlag() { return aflag; }
|
||||
@@ -93,17 +93,17 @@ public:
|
||||
~SfxEntry();
|
||||
|
||||
inline bool allowCross() { return ((opts & aeXPRODUCT) != 0); }
|
||||
struct hentry * checkword(const char * word, int len, int optflags,
|
||||
PfxEntry* ppfx, char ** wlst, int maxSug, int * ns,
|
||||
// const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, char in_compound=IN_CPD_NOT);
|
||||
const FLAG cclass = FLAG_NULL, const FLAG needflag = FLAG_NULL, const FLAG badflag = 0);
|
||||
|
||||
- struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = NULL);
|
||||
+ struct hentry * check_twosfx(const char * word, int len, int optflags, PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
|
||||
char * check_twosfx_morph(const char * word, int len, int optflags,
|
||||
PfxEntry* ppfx, const FLAG needflag = FLAG_NULL);
|
||||
struct hentry * get_next_homonym(struct hentry * he);
|
||||
struct hentry * get_next_homonym(struct hentry * word, int optflags, PfxEntry* ppfx,
|
||||
const FLAG cclass, const FLAG needflag);
|
||||
|
||||
|
@ -1,62 +0,0 @@
|
||||
Bug 927728 - Replace PRUnichar with char16_t.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
@@ -5527,17 +5527,17 @@ struct cs_info * get_current_cs(const ch
|
||||
// in this 1-byte character encoding. Call our encoding/decoding
|
||||
// APIs separately for each byte since they may reject some of the
|
||||
// bytes, and we want to handle errors separately for each byte.
|
||||
char lower, upper;
|
||||
do {
|
||||
if (i == 0)
|
||||
break;
|
||||
const char source = char(i);
|
||||
- PRUnichar uni, uniCased;
|
||||
+ char16_t uni, uniCased;
|
||||
int32_t charLength = 1, uniLength = 1;
|
||||
|
||||
rv = decoder->Convert(&source, &charLength, &uni, &uniLength);
|
||||
// Explicitly check NS_OK because we don't want to allow
|
||||
// NS_OK_UDEC_MOREOUTPUT or NS_OK_UDEC_MOREINPUT.
|
||||
if (rv != NS_OK || charLength != 1 || uniLength != 1)
|
||||
break;
|
||||
uniCased = ToLowerCase(uni);
|
||||
@@ -5680,17 +5680,17 @@ unsigned short unicodetoupper(unsigned s
|
||||
// There are a dotless lower case i pair of upper `I',
|
||||
// and an upper I with dot pair of lower `i'.
|
||||
if (c == 0x0069 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
||||
return 0x0130;
|
||||
#ifdef OPENOFFICEORG
|
||||
return static_cast<unsigned short>(u_toupper(c));
|
||||
#else
|
||||
#ifdef MOZILLA_CLIENT
|
||||
- return ToUpperCase((PRUnichar) c);
|
||||
+ return ToUpperCase((char16_t) c);
|
||||
#else
|
||||
return (utf_tbl) ? utf_tbl[c].cupper : c;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
unsigned short unicodetolower(unsigned short c, int langnum)
|
||||
{
|
||||
@@ -5698,17 +5698,17 @@ unsigned short unicodetolower(unsigned s
|
||||
// There are a dotless lower case i pair of upper `I',
|
||||
// and an upper I with dot pair of lower `i'.
|
||||
if (c == 0x0049 && ((langnum == LANG_az) || (langnum == LANG_tr)))
|
||||
return 0x0131;
|
||||
#ifdef OPENOFFICEORG
|
||||
return static_cast<unsigned short>(u_tolower(c));
|
||||
#else
|
||||
#ifdef MOZILLA_CLIENT
|
||||
- return ToLowerCase((PRUnichar) c);
|
||||
+ return ToLowerCase((char16_t) c);
|
||||
#else
|
||||
return (utf_tbl) ? utf_tbl[c].clower : c;
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
int unicodeisalpha(unsigned short c)
|
||||
{
|
@ -1,71 +0,0 @@
|
||||
Bug 943268 - Remove nsCharsetAlias and nsCharsetConverterManager.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/csutil.cxx b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/csutil.cxx
|
||||
@@ -28,23 +28,22 @@ struct unicode_info {
|
||||
# ifndef MOZILLA_CLIENT
|
||||
# include "utf_info.cxx"
|
||||
# define UTF_LST_LEN (sizeof(utf_lst) / (sizeof(unicode_info)))
|
||||
# endif
|
||||
#endif
|
||||
|
||||
#ifdef MOZILLA_CLIENT
|
||||
#include "nsCOMPtr.h"
|
||||
-#include "nsServiceManagerUtils.h"
|
||||
#include "nsIUnicodeEncoder.h"
|
||||
#include "nsIUnicodeDecoder.h"
|
||||
#include "nsUnicharUtils.h"
|
||||
-#include "nsICharsetConverterManager.h"
|
||||
+#include "mozilla/dom/EncodingUtils.h"
|
||||
|
||||
-static NS_DEFINE_CID(kCharsetConverterManagerCID, NS_ICHARSETCONVERTERMANAGER_CID);
|
||||
+using mozilla::dom::EncodingUtils;
|
||||
#endif
|
||||
|
||||
struct unicode_info2 {
|
||||
char cletter;
|
||||
unsigned short cupper;
|
||||
unsigned short clower;
|
||||
};
|
||||
|
||||
@@ -5500,32 +5499,27 @@ struct cs_info * get_current_cs(const ch
|
||||
ccs[i].cupper = i;
|
||||
}
|
||||
|
||||
|
||||
nsCOMPtr<nsIUnicodeEncoder> encoder;
|
||||
nsCOMPtr<nsIUnicodeDecoder> decoder;
|
||||
|
||||
nsresult rv;
|
||||
- nsCOMPtr<nsICharsetConverterManager> ccm = do_GetService(kCharsetConverterManagerCID, &rv);
|
||||
- if (NS_FAILED(rv))
|
||||
+
|
||||
+ nsAutoCString label(es);
|
||||
+ nsAutoCString encoding;
|
||||
+ if (!EncodingUtils::FindEncodingForLabelNoReplacement(label, encoding)) {
|
||||
return ccs;
|
||||
-
|
||||
- rv = ccm->GetUnicodeEncoder(es, getter_AddRefs(encoder));
|
||||
- if (NS_FAILED(rv))
|
||||
- return ccs;
|
||||
+ }
|
||||
+ encoder = EncodingUtils::EncoderForEncoding(encoding);
|
||||
+ decoder = EncodingUtils::DecoderForEncoding(encoding);
|
||||
encoder->SetOutputErrorBehavior(encoder->kOnError_Signal, nullptr, '?');
|
||||
- rv = ccm->GetUnicodeDecoder(es, getter_AddRefs(decoder));
|
||||
- if (NS_FAILED(rv))
|
||||
- return ccs;
|
||||
decoder->SetInputErrorBehavior(decoder->kOnError_Signal);
|
||||
|
||||
- if (NS_FAILED(rv))
|
||||
- return ccs;
|
||||
-
|
||||
for (unsigned int i = 0; i <= 0xff; ++i) {
|
||||
bool success = false;
|
||||
// We want to find the upper/lowercase equivalents of each byte
|
||||
// in this 1-byte character encoding. Call our encoding/decoding
|
||||
// APIs separately for each byte since they may reject some of the
|
||||
// bytes, and we want to handle errors separately for each byte.
|
||||
char lower, upper;
|
||||
do {
|
@ -1,18 +0,0 @@
|
||||
Don't include config.h in license.hunspell if MOZILLA_CLIENT is set.
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/license.hunspell b/extensions/spellcheck/hunspell/src/license.hunspell
|
||||
--- a/extensions/spellcheck/hunspell/src/license.hunspell
|
||||
+++ b/extensions/spellcheck/hunspell/src/license.hunspell
|
||||
@@ -51,9 +51,11 @@
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
-#include "config.h"
|
||||
+#ifndef MOZILLA_CLIENT
|
||||
+# include "config.h"
|
||||
+#endif
|
@ -1,78 +0,0 @@
|
||||
Bug 983817 - Pad heap allocations passed to flag_qsort() on x86 Linux to work around gcc bug affecting Ubuntu packages. r=froydnj
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/hashmgr.cxx b/extensions/spellcheck/hunspell/src/hashmgr.cxx
|
||||
index 12adf42..95ff23f 100644
|
||||
--- a/extensions/spellcheck/hunspell/src/hashmgr.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/hashmgr.cxx
|
||||
@@ -11,6 +11,20 @@
|
||||
#include "csutil.hxx"
|
||||
#include "atypes.hxx"
|
||||
|
||||
+// The gcc used to build 32-bit builds of Firefox on Ubuntu
|
||||
+// miscompiles flag_qsort, using a 32-bit read instead of a 16-bit
|
||||
+// read while quicksorting an array of 16-bit units. This causes
|
||||
+// one of the top Firefox crashes.
|
||||
+// Given that I haven't been able to produce a reduced testcase to give
|
||||
+// to gcc developers, just work around the bug by allocating an extra 2
|
||||
+// bytes on the heap arrays passed to flag_qsort().
|
||||
+// See https://bugzilla.mozilla.org/show_bug.cgi?id=983817 .
|
||||
+#if defined(__linux__) && defined(__i386__) && defined(__GNUC__)
|
||||
+#define EXTRA_QSORT_ALLOC_SIZE 1
|
||||
+#else
|
||||
+#define EXTRA_QSORT_ALLOC_SIZE 0
|
||||
+#endif
|
||||
+
|
||||
// build a hash table from a munched word list
|
||||
|
||||
HashMgr::HashMgr(const char * tpath, const char * apath, const char * key)
|
||||
@@ -265,8 +279,8 @@ int HashMgr::remove(const char * word)
|
||||
struct hentry * dp = lookup(word);
|
||||
while (dp) {
|
||||
if (dp->alen == 0 || !TESTAFF(dp->astr, forbiddenword, dp->alen)) {
|
||||
- unsigned short * flags =
|
||||
- (unsigned short *) malloc(sizeof(short) * (dp->alen + 1));
|
||||
+ unsigned short * flags = (unsigned short *)
|
||||
+ malloc(sizeof(short) * (dp->alen + 1 + EXTRA_QSORT_ALLOC_SIZE));
|
||||
if (!flags) return 1;
|
||||
for (int i = 0; i < dp->alen; i++) flags[i] = dp->astr[i];
|
||||
flags[dp->alen] = forbiddenword;
|
||||
@@ -508,7 +522,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af)
|
||||
len = strlen(flags);
|
||||
if (len%2 == 1) HUNSPELL_WARNING(stderr, "error: line %d: bad flagvector\n", af->getlinenum());
|
||||
len /= 2;
|
||||
- *result = (unsigned short *) malloc(len * sizeof(short));
|
||||
+ *result = (unsigned short *)
|
||||
+ malloc((len + EXTRA_QSORT_ALLOC_SIZE) * sizeof(short));
|
||||
if (!*result) return -1;
|
||||
for (int i = 0; i < len; i++) {
|
||||
(*result)[i] = (((unsigned short) flags[i * 2]) << 8) + (unsigned short) flags[i * 2 + 1];
|
||||
@@ -524,7 +539,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af)
|
||||
for (p = flags; *p; p++) {
|
||||
if (*p == ',') len++;
|
||||
}
|
||||
- *result = (unsigned short *) malloc(len * sizeof(short));
|
||||
+ *result = (unsigned short *)
|
||||
+ malloc((len + EXTRA_QSORT_ALLOC_SIZE) * sizeof(short));
|
||||
if (!*result) return -1;
|
||||
dest = *result;
|
||||
for (p = flags; *p; p++) {
|
||||
@@ -548,7 +564,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af)
|
||||
case FLAG_UNI: { // UTF-8 characters
|
||||
w_char w[BUFSIZE/2];
|
||||
len = u8_u16(w, BUFSIZE/2, flags);
|
||||
- *result = (unsigned short *) malloc(len * sizeof(short));
|
||||
+ *result =
|
||||
+ (unsigned short *) malloc((len + EXTRA_QSORT_ALLOC_SIZE) * sizeof(short));
|
||||
if (!*result) return -1;
|
||||
memcpy(*result, w, len * sizeof(short));
|
||||
break;
|
||||
@@ -556,7 +573,8 @@ int HashMgr::decode_flags(unsigned short ** result, char * flags, FileMgr * af)
|
||||
default: { // Ispell's one-character flags (erfg -> e r f g)
|
||||
unsigned short * dest;
|
||||
len = strlen(flags);
|
||||
- *result = (unsigned short *) malloc(len * sizeof(short));
|
||||
+ *result = (unsigned short *)
|
||||
+ malloc((len + EXTRA_QSORT_ALLOC_SIZE) * sizeof(short));
|
||||
if (!*result) return -1;
|
||||
dest = *result;
|
||||
for (unsigned char * p = (unsigned char *) flags; *p; p++) {
|
@ -1,28 +0,0 @@
|
||||
Bug 318040 - Accept abbreviated words without dots to workaround incomplete tokenization. r=ehsan
|
||||
|
||||
diff --git a/extensions/spellcheck/hunspell/src/hunspell.cxx b/extensions/spellcheck/hunspell/src/hunspell.cxx
|
||||
--- a/extensions/spellcheck/hunspell/src/hunspell.cxx
|
||||
+++ b/extensions/spellcheck/hunspell/src/hunspell.cxx
|
||||
@@ -347,16 +347,22 @@ int Hunspell::spell(const char * word, i
|
||||
int abbv = 0;
|
||||
int wl = 0;
|
||||
|
||||
// input conversion
|
||||
RepList * rl = (pAMgr) ? pAMgr->get_iconvtable() : NULL;
|
||||
if (rl && rl->conv(word, wspace)) wl = cleanword2(cw, wspace, unicw, &nc, &captype, &abbv);
|
||||
else wl = cleanword2(cw, word, unicw, &nc, &captype, &abbv);
|
||||
|
||||
+#ifdef MOZILLA_CLIENT
|
||||
+ // accept the abbreviated words without dots
|
||||
+ // workaround for the incomplete tokenization of Mozilla
|
||||
+ abbv = 1;
|
||||
+#endif
|
||||
+
|
||||
if (wl == 0 || maxdic == 0) return 1;
|
||||
if (root) *root = NULL;
|
||||
|
||||
// allow numbers with dots, dashes and commas (but forbid double separators: "..", "--" etc.)
|
||||
enum { NBEGIN, NNUM, NSEP };
|
||||
int nstate = NBEGIN;
|
||||
int i;
|
||||
|
@ -4,12 +4,12 @@
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License version 2.1 as published by the Free Software Foundation;
|
||||
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; If not, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
@ -21,273 +21,254 @@
|
||||
transformations out of c't 25/1999
|
||||
|
||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||
|
||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||
|
||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
||||
Porting from Aspell to Hunspell using C-like structs
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdio.h>
|
||||
#include <ctype.h>
|
||||
|
||||
#include "csutil.hxx"
|
||||
#include "phonet.hxx"
|
||||
|
||||
void init_phonet_hash(phonetable & parms)
|
||||
{
|
||||
int i, k;
|
||||
void init_phonet_hash(phonetable& parms) {
|
||||
int i, k;
|
||||
|
||||
for (i = 0; i < HASHSIZE; i++) {
|
||||
parms.hash[i] = -1;
|
||||
}
|
||||
for (i = 0; i < HASHSIZE; i++) {
|
||||
parms.hash[i] = -1;
|
||||
}
|
||||
|
||||
for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
|
||||
/** set hash value **/
|
||||
k = (unsigned char) parms.rules[i][0];
|
||||
for (i = 0; parms.rules[i][0] != '\0'; i += 2) {
|
||||
/** set hash value **/
|
||||
k = (unsigned char)parms.rules[i][0];
|
||||
|
||||
if (parms.hash[k] < 0) {
|
||||
parms.hash[k] = i;
|
||||
}
|
||||
if (parms.hash[k] < 0) {
|
||||
parms.hash[k] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// like strcpy but safe if the strings overlap
|
||||
// but only if dest < src
|
||||
static inline void strmove(char * dest, char * src) {
|
||||
while (*src)
|
||||
static inline void strmove(char* dest, char* src) {
|
||||
while (*src)
|
||||
*dest++ = *src++;
|
||||
*dest = '\0';
|
||||
}
|
||||
|
||||
static int myisalpha(char ch) {
|
||||
if ((unsigned char) ch < 128) return isalpha(ch);
|
||||
if ((unsigned char)ch < 128)
|
||||
return isalpha(ch);
|
||||
return 1;
|
||||
}
|
||||
|
||||
/* Do phonetic transformation. */
|
||||
/* phonetic transcription algorithm */
|
||||
/* see: http://aspell.net/man-html/Phonetic-Code.html */
|
||||
/* convert string to uppercase before this call */
|
||||
int phonet (const char * inword, char * target,
|
||||
int len,
|
||||
phonetable & parms)
|
||||
{
|
||||
/** Do phonetic transformation. **/
|
||||
/** "len" = length of "inword" incl. '\0'. **/
|
||||
std::string phonet(const std::string& inword, phonetable& parms) {
|
||||
|
||||
/** result: >= 0: length of "target" **/
|
||||
/** otherwise: error **/
|
||||
int i, k = 0, p, z;
|
||||
int k0, n0, p0 = -333, z0;
|
||||
char c;
|
||||
const char* s;
|
||||
typedef unsigned char uchar;
|
||||
|
||||
int i,j,k=0,n,p,z;
|
||||
int k0,n0,p0=-333,z0;
|
||||
char c, c0;
|
||||
const char * s;
|
||||
typedef unsigned char uchar;
|
||||
char word[MAXPHONETUTF8LEN + 1];
|
||||
if (len == -1) len = strlen(inword);
|
||||
if (len > MAXPHONETUTF8LEN) return 0;
|
||||
strncpy(word, inword, MAXPHONETUTF8LEN);
|
||||
word[MAXPHONETUTF8LEN] = '\0';
|
||||
|
||||
/** check word **/
|
||||
i = j = z = 0;
|
||||
while ((c = word[i]) != '\0') {
|
||||
n = parms.hash[(uchar) c];
|
||||
z0 = 0;
|
||||
size_t len = inword.size();
|
||||
if (len > MAXPHONETUTF8LEN)
|
||||
return std::string();
|
||||
char word[MAXPHONETUTF8LEN + 1];
|
||||
strncpy(word, inword.c_str(), MAXPHONETUTF8LEN);
|
||||
word[MAXPHONETUTF8LEN] = '\0';
|
||||
|
||||
if (n >= 0) {
|
||||
/** check all rules for the same letter **/
|
||||
while (parms.rules[n][0] == c) {
|
||||
std::string target;
|
||||
/** check word **/
|
||||
i = z = 0;
|
||||
while ((c = word[i]) != '\0') {
|
||||
int n = parms.hash[(uchar)c];
|
||||
z0 = 0;
|
||||
|
||||
/** check whole string **/
|
||||
k = 1; /** number of found letters **/
|
||||
p = 5; /** default priority **/
|
||||
s = parms.rules[n];
|
||||
s++; /** important for (see below) "*(s-1)" **/
|
||||
|
||||
while (*s != '\0' && word[i+k] == *s
|
||||
&& !isdigit ((unsigned char) *s) && strchr ("(-<^$", *s) == NULL) {
|
||||
if (n >= 0) {
|
||||
/** check all rules for the same letter **/
|
||||
while (parms.rules[n][0] == c) {
|
||||
/** check whole string **/
|
||||
k = 1; /** number of found letters **/
|
||||
p = 5; /** default priority **/
|
||||
s = parms.rules[n];
|
||||
s++; /** important for (see below) "*(s-1)" **/
|
||||
|
||||
while (*s != '\0' && word[i + k] == *s && !isdigit((unsigned char)*s) &&
|
||||
strchr("(-<^$", *s) == NULL) {
|
||||
k++;
|
||||
s++;
|
||||
}
|
||||
if (*s == '(') {
|
||||
/** check letters in "(..)" **/
|
||||
if (myisalpha(word[i + k]) // ...could be implied?
|
||||
&& strchr(s + 1, word[i + k]) != NULL) {
|
||||
k++;
|
||||
s++;
|
||||
}
|
||||
if (*s == '(') {
|
||||
/** check letters in "(..)" **/
|
||||
if (myisalpha(word[i+k]) // ...could be implied?
|
||||
&& strchr(s+1, word[i+k]) != NULL) {
|
||||
k++;
|
||||
while (*s != ')')
|
||||
s++;
|
||||
while (*s != ')')
|
||||
s++;
|
||||
}
|
||||
}
|
||||
p0 = (int) *s;
|
||||
k0 = k;
|
||||
while (*s == '-' && k > 1) {
|
||||
k--;
|
||||
s++;
|
||||
}
|
||||
if (*s == '<')
|
||||
s++;
|
||||
if (isdigit ((unsigned char) *s)) {
|
||||
/** determine priority **/
|
||||
p = *s - '0';
|
||||
s++;
|
||||
}
|
||||
if (*s == '^' && *(s+1) == '^')
|
||||
s++;
|
||||
}
|
||||
p0 = (int)*s;
|
||||
k0 = k;
|
||||
while (*s == '-' && k > 1) {
|
||||
k--;
|
||||
s++;
|
||||
}
|
||||
if (*s == '<')
|
||||
s++;
|
||||
if (isdigit((unsigned char)*s)) {
|
||||
/** determine priority **/
|
||||
p = *s - '0';
|
||||
s++;
|
||||
}
|
||||
if (*s == '^' && *(s + 1) == '^')
|
||||
s++;
|
||||
|
||||
if (*s == '\0'
|
||||
|| (*s == '^'
|
||||
&& (i == 0 || ! myisalpha(word[i-1]))
|
||||
&& (*(s+1) != '$'
|
||||
|| (! myisalpha(word[i+k0]) )))
|
||||
|| (*s == '$' && i > 0
|
||||
&& myisalpha(word[i-1])
|
||||
&& (! myisalpha(word[i+k0]) )))
|
||||
{
|
||||
/** search for followup rules, if: **/
|
||||
/** parms.followup and k > 1 and NO '-' in searchstring **/
|
||||
c0 = word[i+k-1];
|
||||
n0 = parms.hash[(uchar) c0];
|
||||
if (*s == '\0' || (*s == '^' && (i == 0 || !myisalpha(word[i - 1])) &&
|
||||
(*(s + 1) != '$' || (!myisalpha(word[i + k0])))) ||
|
||||
(*s == '$' && i > 0 && myisalpha(word[i - 1]) &&
|
||||
(!myisalpha(word[i + k0])))) {
|
||||
/** search for followup rules, if: **/
|
||||
/** parms.followup and k > 1 and NO '-' in searchstring **/
|
||||
char c0 = word[i + k - 1];
|
||||
n0 = parms.hash[(uchar)c0];
|
||||
|
||||
// if (parms.followup && k > 1 && n0 >= 0
|
||||
if (k > 1 && n0 >= 0
|
||||
&& p0 != (int) '-' && word[i+k] != '\0') {
|
||||
/** test follow-up rule for "word[i+k]" **/
|
||||
while (parms.rules[n0][0] == c0) {
|
||||
|
||||
/** check whole string **/
|
||||
k0 = k;
|
||||
p0 = 5;
|
||||
s = parms.rules[n0];
|
||||
s++;
|
||||
while (*s != '\0' && word[i+k0] == *s
|
||||
&& ! isdigit((unsigned char) *s) && strchr("(-<^$",*s) == NULL) {
|
||||
k0++;
|
||||
s++;
|
||||
}
|
||||
if (*s == '(') {
|
||||
/** check letters **/
|
||||
if (myisalpha(word[i+k0])
|
||||
&& strchr (s+1, word[i+k0]) != NULL) {
|
||||
k0++;
|
||||
while (*s != ')' && *s != '\0')
|
||||
s++;
|
||||
if (*s == ')')
|
||||
s++;
|
||||
}
|
||||
}
|
||||
while (*s == '-') {
|
||||
/** "k0" gets NOT reduced **/
|
||||
/** because "if (k0 == k)" **/
|
||||
s++;
|
||||
}
|
||||
if (*s == '<')
|
||||
s++;
|
||||
if (isdigit ((unsigned char) *s)) {
|
||||
p0 = *s - '0';
|
||||
s++;
|
||||
}
|
||||
|
||||
if (*s == '\0'
|
||||
/** *s == '^' cuts **/
|
||||
|| (*s == '$' && ! myisalpha(word[i+k0])))
|
||||
{
|
||||
if (k0 == k) {
|
||||
/** this is just a piece of the string **/
|
||||
n0 += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (p0 < p) {
|
||||
/** priority too low **/
|
||||
n0 += 2;
|
||||
continue;
|
||||
}
|
||||
/** rule fits; stop search **/
|
||||
break;
|
||||
}
|
||||
n0 += 2;
|
||||
} /** End of "while (parms.rules[n0][0] == c0)" **/
|
||||
|
||||
if (p0 >= p && parms.rules[n0][0] == c0) {
|
||||
n += 2;
|
||||
continue;
|
||||
}
|
||||
} /** end of follow-up stuff **/
|
||||
|
||||
/** replace string **/
|
||||
s = parms.rules[n+1];
|
||||
p0 = (parms.rules[n][0] != '\0'
|
||||
&& strchr (parms.rules[n]+1,'<') != NULL) ? 1:0;
|
||||
if (p0 == 1 && z == 0) {
|
||||
/** rule with '<' is used **/
|
||||
if (j > 0 && *s != '\0'
|
||||
&& (target[j-1] == c || target[j-1] == *s)) {
|
||||
j--;
|
||||
}
|
||||
z0 = 1;
|
||||
z = 1;
|
||||
k0 = 0;
|
||||
while (*s != '\0' && word[i+k0] != '\0') {
|
||||
word[i+k0] = *s;
|
||||
// if (parms.followup && k > 1 && n0 >= 0
|
||||
if (k > 1 && n0 >= 0 && p0 != (int)'-' && word[i + k] != '\0') {
|
||||
/** test follow-up rule for "word[i+k]" **/
|
||||
while (parms.rules[n0][0] == c0) {
|
||||
/** check whole string **/
|
||||
k0 = k;
|
||||
p0 = 5;
|
||||
s = parms.rules[n0];
|
||||
s++;
|
||||
while (*s != '\0' && word[i + k0] == *s &&
|
||||
!isdigit((unsigned char)*s) &&
|
||||
strchr("(-<^$", *s) == NULL) {
|
||||
k0++;
|
||||
s++;
|
||||
}
|
||||
if (k > k0)
|
||||
strmove (&word[0]+i+k0, &word[0]+i+k);
|
||||
|
||||
/** new "actual letter" **/
|
||||
c = word[i];
|
||||
}
|
||||
else { /** no '<' rule used **/
|
||||
i += k - 1;
|
||||
z = 0;
|
||||
while (*s != '\0'
|
||||
&& *(s+1) != '\0' && j < len) {
|
||||
if (j == 0 || target[j-1] != *s) {
|
||||
target[j] = *s;
|
||||
j++;
|
||||
if (*s == '(') {
|
||||
/** check letters **/
|
||||
if (myisalpha(word[i + k0]) &&
|
||||
strchr(s + 1, word[i + k0]) != NULL) {
|
||||
k0++;
|
||||
while (*s != ')' && *s != '\0')
|
||||
s++;
|
||||
if (*s == ')')
|
||||
s++;
|
||||
}
|
||||
}
|
||||
while (*s == '-') {
|
||||
/** "k0" gets NOT reduced **/
|
||||
/** because "if (k0 == k)" **/
|
||||
s++;
|
||||
}
|
||||
/** new "actual letter" **/
|
||||
c = *s;
|
||||
if (parms.rules[n][0] != '\0'
|
||||
&& strstr (parms.rules[n]+1, "^^") != NULL) {
|
||||
if (c != '\0') {
|
||||
target[j] = c;
|
||||
j++;
|
||||
}
|
||||
strmove (&word[0], &word[0]+i+1);
|
||||
i = 0;
|
||||
z0 = 1;
|
||||
if (*s == '<')
|
||||
s++;
|
||||
if (isdigit((unsigned char)*s)) {
|
||||
p0 = *s - '0';
|
||||
s++;
|
||||
}
|
||||
|
||||
if (*s == '\0'
|
||||
/** *s == '^' cuts **/
|
||||
|| (*s == '$' && !myisalpha(word[i + k0]))) {
|
||||
if (k0 == k) {
|
||||
/** this is just a piece of the string **/
|
||||
n0 += 2;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (p0 < p) {
|
||||
/** priority too low **/
|
||||
n0 += 2;
|
||||
continue;
|
||||
}
|
||||
/** rule fits; stop search **/
|
||||
break;
|
||||
}
|
||||
n0 += 2;
|
||||
} /** End of "while (parms.rules[n0][0] == c0)" **/
|
||||
|
||||
if (p0 >= p && parms.rules[n0][0] == c0) {
|
||||
n += 2;
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
} /** end of follow-up stuff **/
|
||||
n += 2;
|
||||
} /** end of while (parms.rules[n][0] == c) **/
|
||||
} /** end of if (n >= 0) **/
|
||||
if (z0 == 0) {
|
||||
// if (k && (assert(p0!=-333),!p0) && j < len && c != '\0'
|
||||
// && (!parms.collapse_result || j == 0 || target[j-1] != c)){
|
||||
if (k && !p0 && j < len && c != '\0'
|
||||
&& (1 || j == 0 || target[j-1] != c)){
|
||||
/** condense only double letters **/
|
||||
target[j] = c;
|
||||
///printf("\n setting \n");
|
||||
j++;
|
||||
}
|
||||
} /** end of follow-up stuff **/
|
||||
|
||||
i++;
|
||||
z = 0;
|
||||
k=0;
|
||||
/** replace string **/
|
||||
s = parms.rules[n + 1];
|
||||
p0 = (parms.rules[n][0] != '\0' &&
|
||||
strchr(parms.rules[n] + 1, '<') != NULL)
|
||||
? 1
|
||||
: 0;
|
||||
if (p0 == 1 && z == 0) {
|
||||
/** rule with '<' is used **/
|
||||
if (!target.empty() && *s != '\0' &&
|
||||
(target[target.size()-1] == c || target[target.size()-1] == *s)) {
|
||||
target.erase(target.size() - 1);
|
||||
}
|
||||
z0 = 1;
|
||||
z = 1;
|
||||
k0 = 0;
|
||||
while (*s != '\0' && word[i + k0] != '\0') {
|
||||
word[i + k0] = *s;
|
||||
k0++;
|
||||
s++;
|
||||
}
|
||||
if (k > k0)
|
||||
strmove(&word[0] + i + k0, &word[0] + i + k);
|
||||
|
||||
/** new "actual letter" **/
|
||||
c = word[i];
|
||||
} else { /** no '<' rule used **/
|
||||
i += k - 1;
|
||||
z = 0;
|
||||
while (*s != '\0' && *(s + 1) != '\0' && target.size() < len) {
|
||||
if (target.empty() || target[target.size()-1] != *s) {
|
||||
target.push_back(*s);
|
||||
}
|
||||
s++;
|
||||
}
|
||||
/** new "actual letter" **/
|
||||
c = *s;
|
||||
if (parms.rules[n][0] != '\0' &&
|
||||
strstr(parms.rules[n] + 1, "^^") != NULL) {
|
||||
if (c != '\0') {
|
||||
target.push_back(c);
|
||||
}
|
||||
strmove(&word[0], &word[0] + i + 1);
|
||||
i = 0;
|
||||
z0 = 1;
|
||||
}
|
||||
}
|
||||
break;
|
||||
} /** end of follow-up stuff **/
|
||||
n += 2;
|
||||
} /** end of while (parms.rules[n][0] == c) **/
|
||||
} /** end of if (n >= 0) **/
|
||||
if (z0 == 0) {
|
||||
if (k && !p0 && target.size() < len && c != '\0' &&
|
||||
(1 || target.empty() || target[target.size()-1] != c)) {
|
||||
/** condense only double letters **/
|
||||
target.push_back(c);
|
||||
/// printf("\n setting \n");
|
||||
}
|
||||
} /** end of while ((c = word[i]) != '\0') **/
|
||||
|
||||
target[j] = '\0';
|
||||
return (j);
|
||||
i++;
|
||||
z = 0;
|
||||
k = 0;
|
||||
}
|
||||
} /** end of while ((c = word[i]) != '\0') **/
|
||||
|
||||
} /** end of function "phonet" **/
|
||||
return target;
|
||||
} /** end of function "phonet" **/
|
||||
|
@ -4,12 +4,12 @@
|
||||
This library is free software; you can redistribute it and/or
|
||||
modify it under the terms of the GNU Lesser General Public
|
||||
License version 2.1 as published by the Free Software Foundation;
|
||||
|
||||
|
||||
This library is distributed in the hope that it will be useful,
|
||||
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
Lesser General Public License for more details.
|
||||
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public
|
||||
License along with this library; If not, see
|
||||
<http://www.gnu.org/licenses/>.
|
||||
@ -21,8 +21,8 @@
|
||||
transformations out of c't 25/1999
|
||||
|
||||
2007-07-26 Bjoern Jacke <bjoern at j3e.de>
|
||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||
|
||||
Released under MPL/GPL/LGPL tri-license for Hunspell
|
||||
|
||||
2007-08-23 Laszlo Nemeth <nemeth at OOo>
|
||||
Porting from Aspell to Hunspell using C-like structs
|
||||
*/
|
||||
@ -30,23 +30,23 @@
|
||||
#ifndef __PHONETHXX__
|
||||
#define __PHONETHXX__
|
||||
|
||||
#define HASHSIZE 256
|
||||
#define MAXPHONETLEN 256
|
||||
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
||||
#define HASHSIZE 256
|
||||
#define MAXPHONETLEN 256
|
||||
#define MAXPHONETUTF8LEN (MAXPHONETLEN * 4)
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
struct phonetable {
|
||||
char utf8;
|
||||
cs_info * lang;
|
||||
cs_info* lang;
|
||||
int num;
|
||||
char * * rules;
|
||||
char** rules;
|
||||
int hash[HASHSIZE];
|
||||
};
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable & parms);
|
||||
LIBHUNSPELL_DLL_EXPORTED void init_phonet_hash(phonetable& parms);
|
||||
|
||||
LIBHUNSPELL_DLL_EXPORTED int phonet (const char * inword, char * target,
|
||||
int len, phonetable & phone);
|
||||
LIBHUNSPELL_DLL_EXPORTED std::string phonet(const std::string& inword,
|
||||
phonetable& phone);
|
||||
|
||||
#endif
|
||||
|
@ -1,87 +1,193 @@
|
||||
#include "license.hunspell"
|
||||
#include "license.myspell"
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <limits>
|
||||
|
||||
#include "replist.hxx"
|
||||
#include "csutil.hxx"
|
||||
|
||||
RepList::RepList(int n) {
|
||||
dat = (replentry **) malloc(sizeof(replentry *) * n);
|
||||
if (dat == 0) size = 0; else size = n;
|
||||
pos = 0;
|
||||
dat = (replentry**)malloc(sizeof(replentry*) * n);
|
||||
if (dat == 0)
|
||||
size = 0;
|
||||
else
|
||||
size = n;
|
||||
pos = 0;
|
||||
}
|
||||
|
||||
RepList::~RepList()
|
||||
{
|
||||
for (int i = 0; i < pos; i++) {
|
||||
free(dat[i]->pattern);
|
||||
free(dat[i]->pattern2);
|
||||
free(dat[i]);
|
||||
}
|
||||
free(dat);
|
||||
RepList::~RepList() {
|
||||
for (int i = 0; i < pos; i++) {
|
||||
free(dat[i]->pattern);
|
||||
free(dat[i]->pattern2);
|
||||
free(dat[i]);
|
||||
}
|
||||
free(dat);
|
||||
}
|
||||
|
||||
int RepList::get_pos() {
|
||||
return pos;
|
||||
return pos;
|
||||
}
|
||||
|
||||
replentry * RepList::item(int n) {
|
||||
return dat[n];
|
||||
replentry* RepList::item(int n) {
|
||||
return dat[n];
|
||||
}
|
||||
|
||||
int RepList::near(const char * word) {
|
||||
int p1 = 0;
|
||||
int p2 = pos;
|
||||
while ((p2 - p1) > 1) {
|
||||
int m = (p1 + p2) / 2;
|
||||
int c = strcmp(word, dat[m]->pattern);
|
||||
if (c <= 0) {
|
||||
if (c < 0) p2 = m; else p1 = p2 = m;
|
||||
} else p1 = m;
|
||||
int RepList::near(const char* word) {
|
||||
int p1 = 0;
|
||||
int p2 = pos;
|
||||
while ((p2 - p1) > 1) {
|
||||
int m = (p1 + p2) / 2;
|
||||
int c = strcmp(word, dat[m]->pattern);
|
||||
if (c <= 0) {
|
||||
if (c < 0)
|
||||
p2 = m;
|
||||
else
|
||||
p1 = p2 = m;
|
||||
} else
|
||||
p1 = m;
|
||||
}
|
||||
return p1;
|
||||
}
|
||||
|
||||
int RepList::match(const char* word, int n) {
|
||||
if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0)
|
||||
return strlen(dat[n]->pattern);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RepList::add(char* pat1, char* pat2) {
|
||||
if (pos >= size || pat1 == NULL || pat2 == NULL)
|
||||
return 1;
|
||||
replentry* r = (replentry*)malloc(sizeof(replentry));
|
||||
if (r == NULL)
|
||||
return 1;
|
||||
r->pattern = mystrrep(pat1, "_", " ");
|
||||
r->pattern2 = mystrrep(pat2, "_", " ");
|
||||
r->start = false;
|
||||
r->end = false;
|
||||
dat[pos++] = r;
|
||||
for (int i = pos - 1; i > 0; i--) {
|
||||
r = dat[i];
|
||||
if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
|
||||
dat[i] = dat[i - 1];
|
||||
dat[i - 1] = r;
|
||||
} else
|
||||
break;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RepList::conv(const char* word, char* dest, size_t destsize) {
|
||||
size_t stl = 0;
|
||||
int change = 0;
|
||||
for (size_t i = 0; i < strlen(word); i++) {
|
||||
int n = near(word + i);
|
||||
int l = match(word + i, n);
|
||||
if (l) {
|
||||
size_t replen = strlen(dat[n]->pattern2);
|
||||
if (stl + replen >= destsize)
|
||||
return -1;
|
||||
strcpy(dest + stl, dat[n]->pattern2);
|
||||
stl += replen;
|
||||
i += l - 1;
|
||||
change = 1;
|
||||
} else {
|
||||
if (stl + 1 >= destsize)
|
||||
return -1;
|
||||
dest[stl++] = word[i];
|
||||
}
|
||||
return p1;
|
||||
}
|
||||
dest[stl] = '\0';
|
||||
return change;
|
||||
}
|
||||
|
||||
int RepList::match(const char * word, int n) {
|
||||
if (strncmp(word, dat[n]->pattern, strlen(dat[n]->pattern)) == 0) return strlen(dat[n]->pattern);
|
||||
return 0;
|
||||
}
|
||||
bool RepList::conv(const char* word, std::string& dest) {
|
||||
dest.clear();
|
||||
|
||||
int RepList::add(char * pat1, char * pat2) {
|
||||
if (pos >= size || pat1 == NULL || pat2 == NULL) return 1;
|
||||
replentry * r = (replentry *) malloc(sizeof(replentry));
|
||||
if (r == NULL) return 1;
|
||||
r->pattern = mystrrep(pat1, "_", " ");
|
||||
r->pattern2 = mystrrep(pat2, "_", " ");
|
||||
r->start = false;
|
||||
r->end = false;
|
||||
dat[pos++] = r;
|
||||
for (int i = pos - 1; i > 0; i--) {
|
||||
r = dat[i];
|
||||
if (strcmp(r->pattern, dat[i - 1]->pattern) < 0) {
|
||||
dat[i] = dat[i - 1];
|
||||
dat[i - 1] = r;
|
||||
} else break;
|
||||
bool change = false;
|
||||
for (size_t i = 0; i < strlen(word); i++) {
|
||||
int n = near(word + i);
|
||||
int l = match(word + i, n);
|
||||
if (l) {
|
||||
dest.append(dat[n]->pattern2);
|
||||
i += l - 1;
|
||||
change = true;
|
||||
} else {
|
||||
dest.push_back(word[i]);
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int RepList::conv(const char * word, char * dest) {
|
||||
int stl = 0;
|
||||
int change = 0;
|
||||
for (size_t i = 0; i < strlen(word); i++) {
|
||||
int n = near(word + i);
|
||||
int l = match(word + i, n);
|
||||
if (l) {
|
||||
strcpy(dest + stl, dat[n]->pattern2);
|
||||
stl += strlen(dat[n]->pattern2);
|
||||
i += l - 1;
|
||||
change = 1;
|
||||
} else dest[stl++] = word[i];
|
||||
}
|
||||
dest[stl] = '\0';
|
||||
return change;
|
||||
}
|
||||
return change;
|
||||
}
|
||||
|
@ -1,3 +1,76 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* string replacement list class */
|
||||
#ifndef _REPLIST_HXX_
|
||||
#define _REPLIST_HXX_
|
||||
@ -6,25 +79,29 @@
|
||||
|
||||
#include "w_char.hxx"
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED RepList
|
||||
{
|
||||
private:
|
||||
RepList(const RepList&);
|
||||
RepList& operator = (const RepList&);
|
||||
protected:
|
||||
replentry ** dat;
|
||||
int size;
|
||||
int pos;
|
||||
#include <string>
|
||||
#include <vector>
|
||||
|
||||
public:
|
||||
RepList(int n);
|
||||
~RepList();
|
||||
class LIBHUNSPELL_DLL_EXPORTED RepList {
|
||||
private:
|
||||
RepList(const RepList&);
|
||||
RepList& operator=(const RepList&);
|
||||
|
||||
int get_pos();
|
||||
int add(char * pat1, char * pat2);
|
||||
replentry * item(int n);
|
||||
int near(const char * word);
|
||||
int match(const char * word, int n);
|
||||
int conv(const char * word, char * dest);
|
||||
protected:
|
||||
replentry** dat;
|
||||
int size;
|
||||
int pos;
|
||||
|
||||
public:
|
||||
RepList(int n);
|
||||
~RepList();
|
||||
|
||||
int get_pos();
|
||||
int add(char* pat1, char* pat2);
|
||||
replentry* item(int n);
|
||||
int near(const char* word);
|
||||
int match(const char* word, int n);
|
||||
int conv(const char* word, char* dest, size_t destsize);
|
||||
bool conv(const char* word, std::string& dest);
|
||||
};
|
||||
#endif
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -1,8 +1,79 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
/*
|
||||
* Copyright 2002 Kevin B. Hendricks, Stratford, Ontario, Canada
|
||||
* And Contributors. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. All modifications to the source code must be clearly marked as
|
||||
* such. Binary redistributions based on modified source code
|
||||
* must be clearly marked as modified versions in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY KEVIN B. HENDRICKS AND CONTRIBUTORS
|
||||
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
|
||||
* FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
* KEVIN B. HENDRICKS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
|
||||
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
|
||||
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef _SUGGESTMGR_HXX_
|
||||
#define _SUGGESTMGR_HXX_
|
||||
|
||||
#define MAXSWL 100
|
||||
#define MAXSWUTF8L (MAXSWL * 4)
|
||||
#define MAX_ROOTS 100
|
||||
#define MAX_WORDS 100
|
||||
#define MAX_GUESS 200
|
||||
@ -15,10 +86,10 @@
|
||||
#define MINTIMER 100
|
||||
#define MAXPLUSTIMER 100
|
||||
|
||||
#define NGRAM_LONGER_WORSE (1 << 0)
|
||||
#define NGRAM_ANY_MISMATCH (1 << 1)
|
||||
#define NGRAM_LOWERING (1 << 2)
|
||||
#define NGRAM_WEIGHTED (1 << 3)
|
||||
#define NGRAM_LONGER_WORSE (1 << 0)
|
||||
#define NGRAM_ANY_MISMATCH (1 << 1)
|
||||
#define NGRAM_LOWERING (1 << 2)
|
||||
#define NGRAM_WEIGHTED (1 << 3)
|
||||
|
||||
#include "hunvisapi.h"
|
||||
|
||||
@ -30,86 +101,98 @@
|
||||
|
||||
enum { LCS_UP, LCS_LEFT, LCS_UPLEFT };
|
||||
|
||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr
|
||||
{
|
||||
private:
|
||||
class LIBHUNSPELL_DLL_EXPORTED SuggestMgr {
|
||||
private:
|
||||
SuggestMgr(const SuggestMgr&);
|
||||
SuggestMgr& operator = (const SuggestMgr&);
|
||||
private:
|
||||
char * ckey;
|
||||
int ckeyl;
|
||||
w_char * ckey_utf;
|
||||
SuggestMgr& operator=(const SuggestMgr&);
|
||||
|
||||
char * ctry;
|
||||
int ctryl;
|
||||
w_char * ctry_utf;
|
||||
private:
|
||||
char* ckey;
|
||||
int ckeyl;
|
||||
w_char* ckey_utf;
|
||||
|
||||
AffixMgr* pAMgr;
|
||||
int maxSug;
|
||||
struct cs_info * csconv;
|
||||
int utf8;
|
||||
int langnum;
|
||||
int nosplitsugs;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int complexprefixes;
|
||||
char* ctry;
|
||||
int ctryl;
|
||||
w_char* ctry_utf;
|
||||
|
||||
AffixMgr* pAMgr;
|
||||
int maxSug;
|
||||
struct cs_info* csconv;
|
||||
int utf8;
|
||||
int langnum;
|
||||
int nosplitsugs;
|
||||
int maxngramsugs;
|
||||
int maxcpdsugs;
|
||||
int complexprefixes;
|
||||
|
||||
public:
|
||||
SuggestMgr(const char * tryme, int maxn, AffixMgr *aptr);
|
||||
public:
|
||||
SuggestMgr(const char* tryme, int maxn, AffixMgr* aptr);
|
||||
~SuggestMgr();
|
||||
|
||||
int suggest(char*** slst, const char * word, int nsug, int * onlycmpdsug);
|
||||
int ngsuggest(char ** wlst, char * word, int ns, HashMgr** pHMgr, int md);
|
||||
int suggest_auto(char*** slst, const char * word, int nsug);
|
||||
int suggest_stems(char*** slst, const char * word, int nsug);
|
||||
int suggest_pos_stems(char*** slst, const char * word, int nsug);
|
||||
int suggest(char*** slst, const char* word, int nsug, int* onlycmpdsug);
|
||||
int ngsuggest(char** wlst, const char* word, int ns, HashMgr** pHMgr, int md);
|
||||
int suggest_auto(char*** slst, const char* word, int nsug);
|
||||
int suggest_stems(char*** slst, const char* word, int nsug);
|
||||
int suggest_pos_stems(char*** slst, const char* word, int nsug);
|
||||
|
||||
char * suggest_morph(const char * word);
|
||||
char * suggest_gen(char ** pl, int pln, char * pattern);
|
||||
char * suggest_morph_for_spelling_error(const char * word);
|
||||
char* suggest_morph(const char* word);
|
||||
char* suggest_gen(char** pl, int pln, const char* pattern);
|
||||
char* suggest_morph_for_spelling_error(const char* word);
|
||||
|
||||
private:
|
||||
int testsug(char** wlst, const char * candidate, int wl, int ns, int cpdsuggest,
|
||||
int * timer, clock_t * timelimit);
|
||||
int checkword(const char *, int, int, int *, clock_t *);
|
||||
int check_forbidden(const char *, int);
|
||||
private:
|
||||
int testsug(char** wlst,
|
||||
const char* candidate,
|
||||
int wl,
|
||||
int ns,
|
||||
int cpdsuggest,
|
||||
int* timer,
|
||||
clock_t* timelimit);
|
||||
int checkword(const char*, int, int, int*, clock_t*);
|
||||
int check_forbidden(const char*, int);
|
||||
|
||||
int capchars(char **, const char *, int, int);
|
||||
int replchars(char**, const char *, int, int);
|
||||
int doubletwochars(char**, const char *, int, int);
|
||||
int forgotchar(char **, const char *, int, int);
|
||||
int swapchar(char **, const char *, int, int);
|
||||
int longswapchar(char **, const char *, int, int);
|
||||
int movechar(char **, const char *, int, int);
|
||||
int extrachar(char **, const char *, int, int);
|
||||
int badcharkey(char **, const char *, int, int);
|
||||
int badchar(char **, const char *, int, int);
|
||||
int twowords(char **, const char *, int, int);
|
||||
int fixstems(char **, const char *, int);
|
||||
int capchars(char**, const char*, int, int);
|
||||
int replchars(char**, const char*, int, int);
|
||||
int doubletwochars(char**, const char*, int, int);
|
||||
int forgotchar(char**, const char*, int, int);
|
||||
int swapchar(char**, const char*, int, int);
|
||||
int longswapchar(char**, const char*, int, int);
|
||||
int movechar(char**, const char*, int, int);
|
||||
int extrachar(char**, const char*, int, int);
|
||||
int badcharkey(char**, const char*, int, int);
|
||||
int badchar(char**, const char*, int, int);
|
||||
int twowords(char**, const char*, int, int);
|
||||
int fixstems(char**, const char*, int);
|
||||
|
||||
int capchars_utf(char **, const w_char *, int wl, int, int);
|
||||
int doubletwochars_utf(char**, const w_char *, int wl, int, int);
|
||||
int forgotchar_utf(char**, const w_char *, int wl, int, int);
|
||||
int extrachar_utf(char**, const w_char *, int wl, int, int);
|
||||
int badcharkey_utf(char **, const w_char *, int wl, int, int);
|
||||
int badchar_utf(char **, const w_char *, int wl, int, int);
|
||||
int swapchar_utf(char **, const w_char *, int wl, int, int);
|
||||
int longswapchar_utf(char **, const w_char *, int, int, int);
|
||||
int movechar_utf(char **, const w_char *, int, int, int);
|
||||
|
||||
int mapchars(char**, const char *, int, int);
|
||||
int map_related(const char *, char *, int, int, char ** wlst, int, int, const mapentry*, int, int *, clock_t *);
|
||||
int ngram(int n, char * s1, const char * s2, int opt);
|
||||
int mystrlen(const char * word);
|
||||
int leftcommonsubstring(char * s1, const char * s2);
|
||||
int commoncharacterpositions(char * s1, const char * s2, int * is_swap);
|
||||
void bubblesort( char ** rwd, char ** rwd2, int * rsc, int n);
|
||||
void lcs(const char * s, const char * s2, int * l1, int * l2, char ** result);
|
||||
int lcslen(const char * s, const char* s2);
|
||||
char * suggest_hentry_gen(hentry * rv, char * pattern);
|
||||
int capchars_utf(char**, const w_char*, int wl, int, int);
|
||||
int doubletwochars_utf(char**, const w_char*, int wl, int, int);
|
||||
int forgotchar_utf(char**, const w_char*, int wl, int, int);
|
||||
int extrachar_utf(char**, const w_char*, int wl, int, int);
|
||||
int badcharkey_utf(char**, const w_char*, int wl, int, int);
|
||||
int badchar_utf(char**, const w_char*, int wl, int, int);
|
||||
int swapchar_utf(char**, const w_char*, int wl, int, int);
|
||||
int longswapchar_utf(char**, const w_char*, int, int, int);
|
||||
int movechar_utf(char**, const w_char*, int, int, int);
|
||||
|
||||
int mapchars(char**, const char*, int, int);
|
||||
int map_related(const char*,
|
||||
std::string&,
|
||||
int,
|
||||
char** wlst,
|
||||
int,
|
||||
int,
|
||||
const mapentry*,
|
||||
int,
|
||||
int*,
|
||||
clock_t*);
|
||||
int ngram(int n, const std::string& s1, const std::string& s2, int opt);
|
||||
int mystrlen(const char* word);
|
||||
int leftcommonsubstring(const char* s1, const char* s2);
|
||||
int commoncharacterpositions(const char* s1, const char* s2, int* is_swap);
|
||||
void bubblesort(char** rwd, char** rwd2, int* rsc, int n);
|
||||
void lcs(const char* s, const char* s2, int* l1, int* l2, char** result);
|
||||
int lcslen(const char* s, const char* s2);
|
||||
int lcslen(const std::string& s, const std::string& s2);
|
||||
char* suggest_hentry_gen(hentry* rv, const char* pattern);
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -1,19 +1,73 @@
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Mozilla Public License Version
|
||||
* 1.1 (the "License"); you may not use this file except in compliance with
|
||||
* the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/MPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is Hunspell, based on MySpell.
|
||||
*
|
||||
* The Initial Developers of the Original Code are
|
||||
* Kevin Hendricks (MySpell) and Németh László (Hunspell).
|
||||
* Portions created by the Initial Developers are Copyright (C) 2002-2005
|
||||
* the Initial Developers. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s): David Einstein, Davide Prina, Giuseppe Modugno,
|
||||
* Gianluca Turconi, Simon Brouwer, Noll János, Bíró Árpád,
|
||||
* Goldman Eleonóra, Sarlós Tamás, Bencsáth Boldizsár, Halácsy Péter,
|
||||
* Dvornik László, Gefferth András, Nagy Viktor, Varga Dániel, Chris Halls,
|
||||
* Rene Engelhard, Bram Moolenaar, Dafydd Jones, Harri Pitkänen
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the MPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the MPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef __WCHARHXX__
|
||||
#define __WCHARHXX__
|
||||
|
||||
#ifndef GCC
|
||||
typedef struct {
|
||||
struct w_char {
|
||||
#else
|
||||
typedef struct __attribute__ ((packed)) {
|
||||
struct __attribute__((packed)) w_char {
|
||||
#endif
|
||||
unsigned char l;
|
||||
unsigned char h;
|
||||
} w_char;
|
||||
unsigned char l;
|
||||
unsigned char h;
|
||||
|
||||
friend bool operator<(const w_char a, const w_char b) {
|
||||
unsigned short a_idx = (a.h << 8) + a.l;
|
||||
unsigned short b_idx = (b.h << 8) + b.l;
|
||||
return a_idx < b_idx;
|
||||
}
|
||||
|
||||
friend bool operator==(const w_char a, const w_char b) {
|
||||
return (((a).l == (b).l) && ((a).h == (b).h));
|
||||
}
|
||||
|
||||
friend bool operator!=(const w_char a, const w_char b) {
|
||||
return !(a == b);;
|
||||
}
|
||||
};
|
||||
|
||||
// two character arrays
|
||||
struct replentry {
|
||||
char * pattern;
|
||||
char * pattern2;
|
||||
char* pattern;
|
||||
char* pattern2;
|
||||
bool start;
|
||||
bool end;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user