mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-11-08 12:37:37 +00:00
ea0646f211
Replace the core bayesian junk mail algorithm with a chi-squared probability distribution modeled after spam bayes and Gary Robinson's work. Change the model for how we count tokens across messages. Token counts get out of alignment when re-training against already classified messages. Revamp the junk mail tokenizer. Make it a hdr sink listener and add custom tokens for attachment information. Ignore tokens larger than 13 characters. Tokenize purely off of white space. Ignore tokens less than 3 bytes in length. There is still a lot more work to be done to the tokenizer. Many thanks to Miguel Varga for working out the initial core algorithm improvement and to all of the folks at spam bayes and of course Gary Robinson for helping to make this happen.
155 lines
7.0 KiB
Plaintext
155 lines
7.0 KiB
Plaintext
/* -*- Mode: IDL; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* ***** BEGIN LICENSE BLOCK *****
|
|
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
|
|
*
|
|
* The contents of this file are subject to the Mozilla Public License Version
|
|
* 1.1 (the "License"); you may not use this file except in compliance with
|
|
* the License. You may obtain a copy of the License at
|
|
* http://www.mozilla.org/MPL/
|
|
*
|
|
* Software distributed under the License is distributed on an "AS IS" basis,
|
|
* WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
|
|
* for the specific language governing rights and limitations under the
|
|
* License.
|
|
*
|
|
* The Original Code is mozilla.org code.
|
|
*
|
|
* The Initial Developer of the Original Code is
|
|
* Netscape Communications Corporation.
|
|
* Portions created by the Initial Developer are Copyright (C) 1998
|
|
* the Initial Developer. All Rights Reserved.
|
|
*
|
|
* Contributor(s):
|
|
*
|
|
* Alternatively, the contents of this file may be used under the terms of
|
|
* either of the GNU General Public License Version 2 or later (the "GPL"),
|
|
* or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
|
|
* in which case the provisions of the GPL or the LGPL are applicable instead
|
|
* of those above. If you wish to allow use of your version of this file only
|
|
* under the terms of either the GPL or the LGPL, and not to allow others to
|
|
* use your version of this file under the terms of the MPL, indicate your
|
|
* decision by deleting the provisions above and replace them with the notice
|
|
* and other provisions required by the GPL or the LGPL. If you do not delete
|
|
* the provisions above, a recipient may use your version of this file under
|
|
* the terms of any one of the MPL, the GPL or the LGPL.
|
|
*
|
|
* ***** END LICENSE BLOCK ***** */
|
|
|
|
#include "nsISupports.idl"
|
|
#include "nsIURL.idl"
|
|
|
|
interface nsIFileSpec;
|
|
interface nsIUrlListener;
|
|
interface nsIMsgStatusFeedback;
|
|
interface nsIMsgIncomingServer;
|
|
interface nsIMsgWindow;
|
|
interface nsILoadGroup;
|
|
interface nsIMsgSearchSession;
|
|
interface nsICacheEntryDescriptor;
|
|
interface nsICacheSession;
|
|
interface nsIMimeHeaders;
|
|
interface nsIStreamListener;
|
|
interface nsIMsgFolder;
|
|
interface nsIMsgHeaderSink;
|
|
|
|
[scriptable, uuid(6CFFCEB0-CB8C-11d2-8065-006008128C4E)]
|
|
interface nsIMsgMailNewsUrl : nsIURL {
|
|
///////////////////////////////////////////////////////////////////////////////
|
|
// Eventually we'd like to push this type of functionality up into nsIURI.
|
|
// The idea is to allow the "application" (the part of the code which wants to
|
|
// run a url in order to perform some action) to register itself as a listener
|
|
// on url. As a url listener, the app will be informed when the url begins to run
|
|
// and when the url is finished.
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
void RegisterListener (in nsIUrlListener aUrlListener);
|
|
void UnRegisterListener (in nsIUrlListener aUrlListener);
|
|
|
|
readonly attribute nsIURI baseURI;
|
|
// Eventually this will be removed and replaced with calls to nsIMsgStatusFeedback
|
|
attribute string errorMessage;
|
|
|
|
// if you really want to know what the current state of the url is (running or not
|
|
// running) you should look into becoming a urlListener...
|
|
void SetUrlState(in boolean runningUrl, in nsresult aStatusCode);
|
|
void GetUrlState(out boolean runningUrl);
|
|
|
|
readonly attribute nsIMsgIncomingServer server;
|
|
attribute nsIMsgFolder folder;
|
|
|
|
// the ownership model for msg feedback
|
|
attribute nsIMsgStatusFeedback statusFeedback;
|
|
|
|
attribute nsIMsgWindow msgWindow;
|
|
|
|
// current mime headers if reading message
|
|
attribute nsIMimeHeaders mimeHeaders;
|
|
|
|
// the load group is computed from the msgWindow
|
|
readonly attribute nsILoadGroup loadGroup;
|
|
|
|
// search session, if we're running a search.
|
|
attribute nsIMsgSearchSession searchSession;
|
|
attribute boolean updatingFolder;
|
|
attribute boolean addToMemoryCache;
|
|
attribute boolean msgIsInLocalCache;
|
|
attribute boolean suppressErrorMsgs; // used to avoid displaying biff error messages
|
|
|
|
attribute nsICacheEntryDescriptor memCacheEntry;
|
|
// if we want mime to cache images fetched by this message, set this cache session.
|
|
attribute nsICacheSession imageCacheSession;
|
|
// hold onto this cache entry until url goes away. This is used by mime to keep
|
|
// images cached until the url is deleted so we don't get entries evicted
|
|
// before we've used them. We need to be sure this doesn't introduce
|
|
// circular references or the url will never get freed.
|
|
void cacheCacheEntry(in nsICacheEntryDescriptor cacheEntry);
|
|
void removeCacheEntry(in nsICacheEntryDescriptor cacheEntry);
|
|
const unsigned long eCopy = 0;
|
|
const unsigned long eMove = 1;
|
|
const unsigned long eDisplay = 2;
|
|
boolean IsUrlType(in unsigned long type);
|
|
nsIStreamListener getSaveAsListener(in boolean addDummyEnvelope, in nsIFileSpec aFileSpec);
|
|
|
|
// typically the header sink is tied to the nsIMsgWindow, but in certain circumstances, a consumer
|
|
// may chose to provide its own header sink for this url
|
|
attribute nsIMsgHeaderSink msgHeaderSink;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
// This is a very small interface which I'm grouping with the mailnewsUrl interface.
|
|
// Several url types (mailbox, imap, nntp) have similar properties because they can
|
|
// represent mail messages. For instance, these urls can be have URI
|
|
// equivalents which represent a message.
|
|
// We want to provide the app the ability to get the URI for the
|
|
// url. This URI to URL mapping doesn't exist for all mailnews urls...hence I'm
|
|
// grouping it into a separate interface...
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
|
|
[scriptable, uuid(02338DD2-E7B9-11d2-8070-006008128C4E)]
|
|
interface nsIMsgMessageUrl : nsISupports {
|
|
// get and set the RDF URI associated with the url. Note, not all urls have
|
|
// had uri's set on them so be prepared to handle cases where this string is empty.
|
|
attribute string uri;
|
|
// used by imap, pop and nntp in order to implement save message to disk
|
|
attribute nsIFileSpec messageFile;
|
|
attribute boolean AddDummyEnvelope;
|
|
attribute boolean canonicalLineEnding;
|
|
attribute string originalSpec;
|
|
};
|
|
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
// This is a very small interface which I'm grouping with the mailnewsUrl interface.
|
|
// I want to isolate out all the I18N specific information that may be associated with
|
|
// any given mailnews url. This gives I18N their own "sandbox" of routines they can add
|
|
// and tweak as they see fit. For now it contains mostly charset information.
|
|
//////////////////////////////////////////////////////////////////////////////////
|
|
|
|
[scriptable, uuid(D71E0785-2862-11d4-98C1-001083010E9B)]
|
|
interface nsIMsgI18NUrl : nsISupports {
|
|
// the charset associated with a folder for this url.....
|
|
readonly attribute string folderCharset;
|
|
readonly attribute boolean folderCharsetOverride;
|
|
// the charsetOverRide is a charset the user may have specified via the menu for
|
|
// a particular message
|
|
attribute string charsetOverRide;
|
|
};
|