mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-28 23:31:56 +00:00
bug 176528, need a nsISemanticUnitScanner to support intl spam mail filter
Implement a new interface for this purpose. win&linux, p=ftang, r=shanjian, sr=beard mac build, p=nhotta, r=ftang, sr=beard
This commit is contained in:
parent
729bf8f659
commit
3b0054ca0b
@ -815,6 +815,7 @@ sub BuildClientDist()
|
||||
|
||||
#LWBRK
|
||||
InstallFromManifest(":mozilla:intl:lwbrk:public:MANIFEST", "$distdirectory:lwbrk:");
|
||||
InstallFromManifest(":mozilla:intl:lwbrk:idl:MANIFEST_IDL", "$distdirectory:idl:");
|
||||
|
||||
#STRRES
|
||||
InstallFromManifest(":mozilla:intl:strres:public:MANIFEST_IDL", "$distdirectory:idl:");
|
||||
@ -1507,6 +1508,7 @@ sub BuildIDLProjects()
|
||||
BuildIDLProject(":mozilla:intl:unicharutil:macbuild:unicharutilIDL.xml", "unicharutil");
|
||||
BuildIDLProject(":mozilla:intl:uconv:macbuild:uconvIDL.xml", "uconv");
|
||||
BuildIDLProject(":mozilla:intl:chardet:macbuild:chardetIDL.xml", "chardet");
|
||||
BuildIDLProject(":mozilla:intl:lwbrk:macbuild:lwbrkIDL.xml", "lwbrk");
|
||||
|
||||
if ($main::options{iiextras})
|
||||
{
|
||||
|
@ -46,6 +46,7 @@
|
||||
|
||||
// lwbrk
|
||||
#include "nsLWBrkConstructors.h"
|
||||
#include "nsSemanticUnitScanner.h"
|
||||
|
||||
// unicharutil
|
||||
#include "nsUcharUtilConstructors.h"
|
||||
@ -56,6 +57,9 @@
|
||||
// locale
|
||||
#include "nsLocaleConstructors.h"
|
||||
|
||||
|
||||
NS_GENERIC_FACTORY_CONSTRUCTOR(nsSemanticUnitScanner);
|
||||
|
||||
static NS_METHOD
|
||||
AddCategoryEntry(const char* category,
|
||||
const char* key,
|
||||
@ -290,6 +294,8 @@ static nsModuleComponentInfo components[] =
|
||||
// lwbrk
|
||||
{ "Line and Word Breaker", NS_LWBRK_CID,
|
||||
NS_LWBRK_CONTRACTID, nsLWBreakerFImpConstructor},
|
||||
{ "Semantic Unit Scanner", NS_SEMANTICUNITSCANNER_CID,
|
||||
NS_SEMANTICUNITSCANNER_CONTRACTID, nsSemanticUnitScannerConstructor},
|
||||
|
||||
// unicharutil
|
||||
{ "Unichar Utility", NS_UNICHARUTIL_CID,
|
||||
|
@ -26,7 +26,7 @@ VPATH = @srcdir@
|
||||
|
||||
include $(DEPTH)/config/autoconf.mk
|
||||
|
||||
DIRS = public src
|
||||
DIRS = idl public src
|
||||
|
||||
ifdef ENABLE_TESTS
|
||||
DIRS += tests
|
||||
|
@ -42,6 +42,7 @@ CPPSRCS = \
|
||||
nsJISx4501LineBreaker.cpp \
|
||||
nsLWBreakerFImp.cpp \
|
||||
nsSampleWordBreaker.cpp \
|
||||
nsSemanticUnitScanner.cpp \
|
||||
$(NULL)
|
||||
|
||||
include $(topsrcdir)/config/rules.mk
|
||||
|
@ -74,18 +74,6 @@ nsresult nsSampleWordBreaker::BreakInBetween(
|
||||
}
|
||||
|
||||
|
||||
// hack
|
||||
typedef enum {
|
||||
kWbClassSpace = 0,
|
||||
kWbClassAlphaLetter,
|
||||
kWbClassPunct,
|
||||
kWbClassHanLetter,
|
||||
kWbClassKatakanaLetter,
|
||||
kWbClassHiraganaLetter,
|
||||
kWbClassHWKatakanaLetter,
|
||||
kWbClassThaiLetter
|
||||
} wb_class;
|
||||
|
||||
#define IS_ASCII(c) (0 == ( 0xFF80 & (c)))
|
||||
#define ASCII_IS_ALPHA(c) ((( 'a' <= (c)) && ((c) <= 'z')) || (( 'A' <= (c)) && ((c) <= 'Z')))
|
||||
#define ASCII_IS_DIGIT(c) (( '0' <= (c)) && ((c) <= '9'))
|
||||
|
@ -41,6 +41,17 @@
|
||||
|
||||
#include "nsIWordBreaker.h"
|
||||
|
||||
typedef enum {
|
||||
kWbClassSpace = 0,
|
||||
kWbClassAlphaLetter,
|
||||
kWbClassPunct,
|
||||
kWbClassHanLetter,
|
||||
kWbClassKatakanaLetter,
|
||||
kWbClassHiraganaLetter,
|
||||
kWbClassHWKatakanaLetter,
|
||||
kWbClassThaiLetter
|
||||
} wb_class;
|
||||
|
||||
class nsSampleWordBreaker : public nsIWordBreaker
|
||||
{
|
||||
NS_DECL_ISUPPORTS
|
||||
|
119
intl/lwbrk/src/nsSemanticUnitScanner.cpp
Normal file
119
intl/lwbrk/src/nsSemanticUnitScanner.cpp
Normal file
@ -0,0 +1,119 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.1 (the "License"); you may not use this file except in
|
||||
* compliance with the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the NPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the NPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#include "nsSemanticUnitScanner.h"
|
||||
#include "prmem.h"
|
||||
|
||||
NS_IMPL_ISUPPORTS1(nsSemanticUnitScanner, nsISemanticUnitScanner)
|
||||
|
||||
nsSemanticUnitScanner::nsSemanticUnitScanner() : nsSampleWordBreaker()
|
||||
{
|
||||
NS_INIT_ISUPPORTS();
|
||||
/* member initializers and constructor code */
|
||||
}
|
||||
|
||||
nsSemanticUnitScanner::~nsSemanticUnitScanner()
|
||||
{
|
||||
/* destructor code */
|
||||
}
|
||||
|
||||
|
||||
/* void start (in string characterSet); */
|
||||
NS_IMETHODIMP nsSemanticUnitScanner::Start(const char *characterSet)
|
||||
{
|
||||
// do nothing for now.
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
/* void next (in wstring text, in long length, in long pos, out boolean hasMoreUnits, out long begin, out long end); */
|
||||
NS_IMETHODIMP nsSemanticUnitScanner::Next(const PRUnichar *text, PRInt32 length, PRInt32 pos, PRBool isLastBuffer, PRInt32 *begin, PRInt32 *end, PRBool *_retval)
|
||||
{
|
||||
// xxx need to bullet proff and check input pointer
|
||||
// make sure begin, end and _retval is not nsnull here
|
||||
|
||||
// if we reach the end, just return
|
||||
if (pos >= length) {
|
||||
*begin = pos;
|
||||
*end = pos;
|
||||
*_retval = PR_FALSE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
PRUint8 char_class = nsSampleWordBreaker::GetClass(text[pos]);
|
||||
|
||||
// if we are in chinese mode, return on han letter at a time
|
||||
// we should not do this if we are in Japanese or Korena mode
|
||||
if (kWbClassHanLetter == char_class) {
|
||||
*begin = pos;
|
||||
*end = pos+1;
|
||||
*_retval = PR_TRUE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
PRUint32 next;
|
||||
PRBool needMoreText;
|
||||
// find the next "word"
|
||||
nsresult res = nsSampleWordBreaker::Next(text, (PRUint32) length, (PRUint32) pos,
|
||||
&next, &needMoreText);
|
||||
|
||||
NS_ASSERTION(NS_SUCCEEDED(res), "nsSampleWordBreaker::Next failed");
|
||||
if(NS_FAILED(res))
|
||||
return res;
|
||||
|
||||
// if we don't have enough text to make decision, return
|
||||
if (needMoreText) {
|
||||
*begin = pos;
|
||||
*end = pos;
|
||||
*_retval = PR_FALSE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
||||
// if what we got is space or punct, look at the next break
|
||||
if ( (char_class == kWbClassSpace) || (char_class == kWbClassPunct) ) {
|
||||
// if the next "word" is not letters,
|
||||
// call itself recursively with the new pos
|
||||
return Next(text, length, next, isLastBuffer, begin, end, _retval);
|
||||
}
|
||||
|
||||
// for the rest, return
|
||||
*begin = pos;
|
||||
*end = next;
|
||||
*_retval = PR_TRUE;
|
||||
return NS_OK;
|
||||
}
|
||||
|
58
intl/lwbrk/src/nsSemanticUnitScanner.h
Normal file
58
intl/lwbrk/src/nsSemanticUnitScanner.h
Normal file
@ -0,0 +1,58 @@
|
||||
/* -*- Mode: C++; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
||||
/* ***** BEGIN LICENSE BLOCK *****
|
||||
* Version: NPL 1.1/GPL 2.0/LGPL 2.1
|
||||
*
|
||||
* The contents of this file are subject to the Netscape Public License
|
||||
* Version 1.1 (the "License"); you may not use this file except in
|
||||
* compliance with the License. You may obtain a copy of the License at
|
||||
* http://www.mozilla.org/NPL/
|
||||
*
|
||||
* Software distributed under the License is distributed on an "AS IS" basis,
|
||||
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
||||
* for the specific language governing rights and limitations under the
|
||||
* License.
|
||||
*
|
||||
* The Original Code is mozilla.org code.
|
||||
*
|
||||
* The Initial Developer of the Original Code is
|
||||
* Netscape Communications Corporation.
|
||||
* Portions created by the Initial Developer are Copyright (C) 1998
|
||||
* the Initial Developer. All Rights Reserved.
|
||||
*
|
||||
* Contributor(s):
|
||||
*
|
||||
*
|
||||
* Alternatively, the contents of this file may be used under the terms of
|
||||
* either the GNU General Public License Version 2 or later (the "GPL"), or
|
||||
* the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
||||
* in which case the provisions of the GPL or the LGPL are applicable instead
|
||||
* of those above. If you wish to allow use of your version of this file only
|
||||
* under the terms of either the GPL or the LGPL, and not to allow others to
|
||||
* use your version of this file under the terms of the NPL, indicate your
|
||||
* decision by deleting the provisions above and replace them with the notice
|
||||
* and other provisions required by the GPL or the LGPL. If you do not delete
|
||||
* the provisions above, a recipient may use your version of this file under
|
||||
* the terms of any one of the NPL, the GPL or the LGPL.
|
||||
*
|
||||
* ***** END LICENSE BLOCK ***** */
|
||||
|
||||
#ifndef nsSemanticUnitScanner_h__
|
||||
#define nsSemanticUnitScanner_h__
|
||||
|
||||
#include "nsSampleWordBreaker.h"
|
||||
#include "nsISemanticUnitScanner.h"
|
||||
|
||||
|
||||
class nsSemanticUnitScanner : public nsISemanticUnitScanner
|
||||
, public nsSampleWordBreaker
|
||||
{
|
||||
public:
|
||||
NS_DECL_ISUPPORTS
|
||||
NS_DECL_NSISEMANTICUNITSCANNER
|
||||
|
||||
nsSemanticUnitScanner();
|
||||
virtual ~nsSemanticUnitScanner();
|
||||
/* additional members */
|
||||
};
|
||||
|
||||
#endif
|
@ -1243,6 +1243,13 @@
|
||||
<FILEKIND>Text</FILEKIND>
|
||||
<FILEFLAGS>Debug</FILEFLAGS>
|
||||
</FILE>
|
||||
<FILE>
|
||||
<PATHTYPE>Name</PATHTYPE>
|
||||
<PATH>nsSemanticUnitScanner.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
<FILEKIND>Text</FILEKIND>
|
||||
<FILEFLAGS>Debug</FILEFLAGS>
|
||||
</FILE>
|
||||
</FILELIST>
|
||||
<LINKORDER>
|
||||
<FILEREF>
|
||||
@ -1435,6 +1442,11 @@
|
||||
<PATH>nsEntityConverter.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
</FILEREF>
|
||||
<FILEREF>
|
||||
<PATHTYPE>Name</PATHTYPE>
|
||||
<PATH>nsSemanticUnitScanner.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
</FILEREF>
|
||||
</LINKORDER>
|
||||
</TARGET>
|
||||
<TARGET>
|
||||
@ -2627,6 +2639,13 @@
|
||||
<FILEKIND>Text</FILEKIND>
|
||||
<FILEFLAGS>Debug</FILEFLAGS>
|
||||
</FILE>
|
||||
<FILE>
|
||||
<PATHTYPE>Name</PATHTYPE>
|
||||
<PATH>nsSemanticUnitScanner.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
<FILEKIND>Text</FILEKIND>
|
||||
<FILEFLAGS>Debug</FILEFLAGS>
|
||||
</FILE>
|
||||
</FILELIST>
|
||||
<LINKORDER>
|
||||
<FILEREF>
|
||||
@ -2819,6 +2838,11 @@
|
||||
<PATH>nsEntityConverter.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
</FILEREF>
|
||||
<FILEREF>
|
||||
<PATHTYPE>Name</PATHTYPE>
|
||||
<PATH>nsSemanticUnitScanner.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
</FILEREF>
|
||||
</LINKORDER>
|
||||
</TARGET>
|
||||
</TARGETLIST>
|
||||
@ -3034,6 +3058,12 @@
|
||||
<PATH>rulebrk.c</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
</FILEREF>
|
||||
<FILEREF>
|
||||
<TARGETNAME>i18n.shlb</TARGETNAME>
|
||||
<PATHTYPE>Name</PATHTYPE>
|
||||
<PATH>nsSemanticUnitScanner.cpp</PATH>
|
||||
<PATHFORMAT>MacOS</PATHFORMAT>
|
||||
</FILEREF>
|
||||
</GROUP>
|
||||
<GROUP><NAME>strres</NAME>
|
||||
<FILEREF>
|
||||
|
Loading…
Reference in New Issue
Block a user