gecko-dev/grendel/storage/addressparser/RFC822AddressParser.java
1999-11-02 01:51:54 +00:00

970 lines
23 KiB
Java

/* -*- Mode: java; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The contents of this file are subject to the Mozilla Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/MPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is the Grendel mail/news client.
*
* The Initial Developer of the Original Code is Netscape Communications
* Corporation. Portions created by Netscape are
* Copyright (C) 1997 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*
* Created: Eric Bina <ebina@netscape.com>, 30 Oct 1997.
*/
package grendel.storage.addressparser;
import java.io.*;
import java.util.*;
/*
* Parse a comma separated list of RFC822 addresses.
* The important thing here is all the ERROR cases.
* If you aren't rigidly RFC822 compliant, you will hit
* one of them.
*/
class RFC822AddressParser
{
static final int NO_ERROR = 0;
static final int LIST_OF_ADDRESSES = -1;
static final int NO_DOMAIN = -2;
static final int NO_VECTOR = -3;
static final int COMMENTS_IN_VECTOR = -4;
static final int EXTRA_TOKENS_AT_START = -5;
static final int NO_ADDR_SPEC = -6;
static final int NO_ROUTE_ADDR = -7;
static final int BAD_ROUTE = -8;
static final int BAD_ROUTE_DOMAIN = -9;
static final int EXTRA_TOKENS_IN_ROUTE_ADDR = -10;
static final int NO_PHRASE = -11;
static final int EXTRA_TOKENS_IN_MAILBOX_LIST = -12;
static final int NO_LOCAL_PART = -13;
static final int BAD_GROUP = -14;
static final int ILLEGAL_SPECIAL = -15;
private Vector address_list;
private Vector tokens;
private Vector all_tokens;
private int error_code;
/*
* This is the complete vector of tokens
* straight from the tokenizer.
*/
public RFC822AddressParser(Vector tokens) throws RFC822ParserException
{
AddressToken addr_token = null;
int num;
int start, end;
this.error_code = NO_ERROR;
this.all_tokens = tokens;
this.address_list = new Vector();
this.tokens = stripComments(tokens);
num = this.tokens.size();
if (num == 0)
{
// ERROR: zero length input vector.
this.error_code = NO_VECTOR;
return;
}
start = 0;
end = num - 1;
/*
* Arbitrary commas can start and end this list
*/
start = skipForwardCommas(start, end);
end = skipCommas(end);
if (start > end)
{
// ERROR: zero length input vector.
this.error_code = NO_VECTOR;
return;
}
addr_token = collectAddress(start, end);
if (addr_token == null)
{
return;
}
addMailAddress(addr_token);
end = addr_token.getTokenStart() - 1;
while (end >= start)
{
RFC822Token token;
token = (RFC822Token)this.tokens.elementAt(end);
/*
* Skip the comma separator if found, but
* don't require it.
*/
if (token.isSpecialChar(',') == true)
{
end = skipCommas(end);
}
addr_token = collectAddress(start, end);
if (addr_token == null)
{
return;
}
addMailAddress(addr_token);
end = addr_token.getTokenStart() - 1;
}
if (addr_token.getTokenStart() != start)
{
// ERROR: All tokens in input vector not part
// of a legal address list.
this.error_code = EXTRA_TOKENS_AT_START;
return;
}
}
public Vector getTokenList()
{
return(this.tokens);
}
public boolean isError()
{
if (this.error_code != NO_ERROR)
{
return(true);
}
return(false);
}
public String getErrorString()
{
String err;
switch(this.error_code)
{
case NO_ERROR:
err = new String("No error.");
break;
case LIST_OF_ADDRESSES:
err = new String("Cannot handle non-comma separated list of addresses!");
break;
case NO_DOMAIN:
err = new String("No valid domain.");
break;
case NO_VECTOR:
err = new String("No input vector.");
break;
case COMMENTS_IN_VECTOR:
err = new String("Comment tokens in input vector.");
break;
case EXTRA_TOKENS_AT_START:
err = new String("Extra tokens at the start of the input vector after parsing address list.");
break;
case NO_ADDR_SPEC:
err = new String("No valid address specification.");
break;
case NO_ROUTE_ADDR:
err = new String("No valid route address.");
break;
case BAD_ROUTE:
err = new String("Bad route format.");
break;
case BAD_ROUTE_DOMAIN:
err = new String("Bad domain in a route list.");
break;
case EXTRA_TOKENS_IN_ROUTE_ADDR:
err = new String("Extra tokens within the bounds of the route address.");
break;
case NO_PHRASE:
err = new String("No valid phrase.");
break;
case EXTRA_TOKENS_IN_MAILBOX_LIST:
err = new String("Extra tokens within the bounds of the mailbox list.");
break;
case NO_LOCAL_PART:
err = new String("No valid local part.");
break;
case BAD_GROUP:
err = new String("Bad group format.");
break;
case ILLEGAL_SPECIAL:
err = new String("Found a special character where another token type was expected.");
break;
default:
err = new String("");
}
return(err);
}
public Vector getAddressList()
{
return(this.address_list);
}
/***********************
***********************
** PRIVATE METHODS **
***********************
***********************/
/*
* create a new vector from the passed vector with all
* the comments stripped from it.
*/
private Vector stripComments(Vector tokens)
{
Vector new_vec = new Vector();
int num = tokens.size();
for (int i=0; i < num; i++)
{
RFC822Token token = (RFC822Token)tokens.elementAt(i);
if (token.getType() != RFC822Token.COMMENT)
{
new_vec.addElement(token);
}
}
return(new_vec);
}
/*
* Add an address (either group or mailbox) to the
* beginning of an address list vector.
*/
private void addMailAddress(AddressToken addr_token)
{
MailAddressToken address;
if (addr_token.getType() == AddressToken.GROUP)
{
address = new MailAddressToken((GroupToken)addr_token);
}
else
{
address = new MailAddressToken((MailboxToken)addr_token);
}
this.address_list.insertElementAt((Object)address, 0);
}
/*
* Collect an address. Starts from a rightmost
* token and works left.
* In all cases where we return null we set an error code
*/
private AddressToken collectAddress(int start, int end) throws
RFC822ParserException
{
MailboxToken mailbox = null;
AddressToken addr_token = null;
RFC822Token token;
int type;
token = (RFC822Token)this.tokens.elementAt(end);
type = token.getType();
if (type == RFC822Token.SPECIAL_CHAR)
{
/*
* A '>' must be the right edge of a
* route-addr which must be a mailbox
* made up of a phrase route-addr pair.
*/
if (token.isSpecialChar('>') == true)
{
mailbox = collectMailbox(end);
if (mailbox == null)
{
return(null);
}
return((AddressToken)mailbox);
}
/*
* A ';' must be the right edge of a
* group which must have a ':' separating
* a phrase from zero or more mailboxes
* in a comma separated list.
*/
else if (token.isSpecialChar(';') == true)
{
GroupToken group;
group = collectGroup(end);
if (group == null)
{
return(null);
}
return((AddressToken)group);
}
/*
* No other special character can terminate a
* legal address.
*/
else
{
this.error_code = ILLEGAL_SPECIAL;
return(null);
}
}
else if (type == RFC822Token.COMMENT)
{
// ERROR: No comments in address vector.
this.error_code = COMMENTS_IN_VECTOR;
return(null);
}
else
{
addr_token = collectAddrSpec(end);
// if returns null error code already set.
if (addr_token == null)
{
return(null);
}
mailbox = new MailboxToken(addr_token);
return((AddressToken)mailbox);
}
}
/*
* Collect a group having been passed the rightmost ';'.
* In all cases where we return null we set an error code.
*/
private GroupToken collectGroup(int indx2) throws RFC822ParserException
{
GroupToken group = null;
Vector mailboxes = new Vector();
AddressToken phrase;
RFC822Token token;
int colon_at;
int indx;
/*
* A ';' must be the right edge of a group which
* must have a ':' separating a phrase from zero or
* more mailboxes in a comma separated list.
*/
token = (RFC822Token)this.tokens.elementAt(indx2);
if (token.isSpecialChar(';') == false)
{
// ERROR: group must be terminated
// by a ';'.
this.error_code = BAD_GROUP;
return(null);
}
/*
* Stupid RFC allows extra commas here at the end
* of the list.
*/
indx = skipCommas(indx2 - 1);
token = (RFC822Token)this.tokens.elementAt(indx);
/*
* We have one or more mailboxes in a comma
* separated list.
*/
if (token.isSpecialChar(':') == false)
{
MailboxToken tmp_mbox;
RFC822Token comma;
tmp_mbox = collectMailbox(indx);
// if returns null error code already set.
if (tmp_mbox == null)
{
return(null);
}
mailboxes.insertElementAt((Object)tmp_mbox, 0);
indx = tmp_mbox.getTokenStart();
comma = (RFC822Token)this.tokens.elementAt(indx - 1);
while (comma.isSpecialChar(',') == true)
{
/*
* Skip the extra commas the RFC allows here
*/
indx = skipCommas(indx - 1);
/*
* List could end here with commas
*/
comma = (RFC822Token)this.tokens.elementAt(indx);
if (comma.isSpecialChar(':') == true)
{
indx++;
break;
}
tmp_mbox = collectMailbox(indx);
// if returns null error code already set.
if (tmp_mbox == null)
{
return(null);
}
mailboxes.insertElementAt((Object)tmp_mbox, 0);
indx = tmp_mbox.getTokenStart();
comma = (RFC822Token)this.tokens.elementAt(indx - 1);
}
if (comma.isSpecialChar(':') == false)
{
// ERROR: group contained more than
// a comma separated list of
// mailboxes with the
// ':' to ';' pair.
this.error_code = EXTRA_TOKENS_IN_MAILBOX_LIST;
return(null);
}
colon_at = indx - 1;
}
/*
* This is the case of an empty mailbox list
* in the group.
*/
else
{
colon_at = indx;
}
phrase = collectPhrase(colon_at);
// if returns null error code already set.
if (phrase == null)
{
return(null);
}
group = new GroupToken(phrase, mailboxes, indx2);
return(group);
}
/*
* Function to skip arbitrary number of commas.
* Needed because of the RFC's stupid comma separated list
* rules.
* Passed the index of possibly the first comma,
* returns the index of the first non-comma.
*/
private int skipCommas(int indx)
{
RFC822Token comma;
if (indx >= this.tokens.size())
{
return(indx);
}
comma = (RFC822Token)this.tokens.elementAt(indx);
while(comma.isSpecialChar(',') == true)
{
indx--;
if (indx < 0)
{
break;
}
comma = (RFC822Token)this.tokens.elementAt(indx);
}
return(indx);
}
/*
* Like skipCommas above but skipping forward
* instead of backwards.
*/
private int skipForwardCommas(int indx, int end)
{
RFC822Token comma;
comma = (RFC822Token)this.tokens.elementAt(indx);
while(comma.isSpecialChar(',') == true)
{
indx++;
if (indx > end)
{
break;
}
comma = (RFC822Token)this.tokens.elementAt(indx);
}
return(indx);
}
/*
* Passing in the final '>' token of a route-addr, work backwards
* through the tokens to the matching opening '<' token.
*/
private int findBeginRouteAddr(int end)
{
int indx = end - 1;
while (indx >= 0)
{
RFC822Token token;
token = (RFC822Token)this.tokens.elementAt(indx);
if (token.isSpecialChar('<') == true)
{
break;
}
indx--;
}
if (indx >= 0)
{
return(indx);
}
// ERROR: No matching '<' to '>' pair.
return(NO_ROUTE_ADDR);
}
/*
* Starting at the suspected terminal sub-domain token
* parse up to the beginning of the domain.
* All legal domains begin with an '@' symbol.
* To support the illegal but very common case of a local-part
* with no domain, need to return a special error for
* a missing domain.
*/
private int findBeginDomain(int end)
{
int indx = end;
boolean need_dot = false;
while (indx >= 0)
{
RFC822Token token;
token = (RFC822Token)this.tokens.elementAt(indx);
if (token.isSpecialChar('@') == true)
{
break;
}
/*
* Must be a '.' separated list of sub-domains.
*/
else if ((token.isSpecialChar('.') == true)&&
(need_dot == true))
{
need_dot = false;
}
/*
* Between '.' symbols, only atoms or domain-literals
* make legal sub-domains.
*/
else if (((token.getType() == RFC822Token.ATOM)||
(token.getType() == RFC822Token.DOMAIN_LITERAL))&&
(need_dot == false))
{
need_dot = true;
}
else
{
// ERROR: Encountered some token out of
// position to make a legal domain.
return(NO_DOMAIN);
}
indx--;
}
/*
* Must contain at least one sub-domain after the '@'
* symbol to be a legal domain.
*/
if ((indx >= 0)&&(indx < end))
{
return(indx);
}
// ERROR: No legal domain.
return(NO_DOMAIN);
}
/*
* A local-part looks a lot like a domain. The difference
* being it can have quoted-strings as well as atoms between the '.'
* symbols, while the domain can have domain-literals as well
* as atoms between the '.' symbols.
* So for a list of just '.' separated atoms, you depend
* on the '@' symbol to know which you are.
*
* It is illegal but common for the domain to be ommitted.
* We will treat this as legal.
*/
private int findBeginLocalPart(int end)
{
int indx = end;
boolean need_dot = false;
while (indx >= 0)
{
RFC822Token token;
token = (RFC822Token)this.tokens.elementAt(indx);
if ((token.isSpecialChar('.') == true)&&
(need_dot == true))
{
need_dot = false;
}
else if (((token.getType() == RFC822Token.QUOTED_STRING)||
(token.getType() == RFC822Token.ATOM))&&
(need_dot == false))
{
need_dot = true;
}
/*
* Break the local part on the first non-matching
* token you find.
*/
else
{
break;
}
indx--;
}
/*
* Since we broke on the terminating token, increment
* to get to the start of the local-part.
*/
indx++;
/*
* Must contain at least one word.
* Cannot start with a '.' symbol.
*/
if ((need_dot == true)&&(indx >= 0)&&(indx <= end))
{
return(indx);
}
// ERROR: Not a legal local-part.
return(NO_LOCAL_PART);
}
/*
* A legal phrase is always followed by a special symbol
* (either ':' or '<'). The passed index is assumed to be that
* symbol, and we search back for the start of the phrase.
*/
private int findBeginPhrase(int end)
{
int indx = end - 1;
while (indx >= 0)
{
RFC822Token token;
token = (RFC822Token)this.tokens.elementAt(indx);
if ((token.getType() == RFC822Token.QUOTED_STRING)||
(token.getType() == RFC822Token.ATOM))
{
// OK
}
else
{
break;
}
indx--;
}
/*
* Since we broke on the terminating token, increment
* to get to the start of the phrase.
*/
indx++;
/*
* A legal index must have at least one word in
* the phrase.
*/
if ((indx >= 0)&&(indx < end))
{
return(indx);
}
// ERROR: Illegal phrase.
return(NO_PHRASE);
}
/*
* Having been passed the rightmost token, collect the
* phrase and pass it back as an address token.
* On an error return null and set an error code.
*/
private AddressToken collectPhrase(int indx2)
{
int indx1 = findBeginPhrase(indx2);
AddressToken addr_token;
if (indx1 == NO_PHRASE)
{
this.error_code = NO_PHRASE;
return(null);
}
addr_token = new AddressToken(indx1, indx2 - 1, AddressToken.PHRASE);
return(addr_token);
}
/*
* Having been passed the rightmost token, collect the
* route-addr and pass it back as an address token.
* On an error return null and set an error code.
*/
private AddressToken collectRouteAddr(int indx2)
{
AddressToken addr_token;
RFC822Token token;
int indx;
int indx1 = findBeginRouteAddr(indx2);
if ((indx1 == NO_ROUTE_ADDR)||(indx1 >= (indx2 - 1)))
{
// ERROR: Illegal to have a route-addr with
// an empty body.
this.error_code = NO_ROUTE_ADDR;
return(null);
}
addr_token = collectAddrSpec(indx2 - 1);
// if returns null error code already set.
if (addr_token == null)
{
return(null);
}
indx = addr_token.getTokenStart();
/*
* The route is optional, look for the closing ':'
* symbol to see if we have one.
*/
token = (RFC822Token)this.tokens.elementAt(indx - 1);
if (token.isSpecialChar(':') == true)
{
AddressToken route = collectRoute(indx - 1);
// if returns null error code already set.
if (route == null)
{
return(null);
}
indx = route.getTokenStart();
}
token = (RFC822Token)this.tokens.elementAt(indx - 1);
if (token.isSpecialChar('<') == false)
{
// ERROR: More in the route-addr than a legal
// addr-spec and an optional route.
this.error_code = EXTRA_TOKENS_IN_ROUTE_ADDR;
return(null);
}
addr_token = new AddressToken(indx - 1, indx2, AddressToken.ROUTE_ADDR);
return(addr_token);
}
/*
* Having been passed the rightmost token, collect the
* addr-spec and pass it back as an address token.
* On an error return null and set an error code.
*/
private AddressToken collectAddrSpec(int indx2)
{
int at_sign = findBeginDomain(indx2);
int indx1;
AddressToken addr_token;
if (at_sign == NO_DOMAIN)
{
indx1 = findBeginLocalPart(indx2);
}
else
{
indx1 = findBeginLocalPart(at_sign - 1);
}
if (indx1 == NO_LOCAL_PART)
{
this.error_code = NO_ADDR_SPEC;
return(null);
}
addr_token = new AddressToken(indx1, indx2, AddressToken.ADDR_SPEC);
return(addr_token);
}
/*
* Having been passed the rightmost token, collect the
* route and pass it back as an address token.
* On an error return null and set an error code.
*/
private AddressToken collectRoute(int indx2)
{
AddressToken addr_token;
RFC822Token token;
int at_sign;
int indx;
token = (RFC822Token)this.tokens.elementAt(indx2);
if (token.isSpecialChar(':') == false)
{
// ERROR: Not a legal route, not ':' terminated.
this.error_code = BAD_ROUTE;
return(null);
}
/*
* Stupid RFC allows many commas at end of list.
*/
indx = skipCommas(indx2 - 1);
/*
* Must be at least one domain, look for the '@' symbol
* that starts it.
*/
at_sign = findBeginDomain(indx);
if (at_sign < 1)
{
// ERROR: Must always be room to move back from
// the start of a route because it is
// always enclosed in a route-addr.
// So it is always ok to look for the ','
// In the optional comma separated list.
this.error_code = BAD_ROUTE_DOMAIN;
return(null);
}
/*
* If there is a comma, there are one or more domains to
* skip over.
*/
token = (RFC822Token)this.tokens.elementAt(at_sign - 1);
while (token.isSpecialChar(',') == true)
{
/*
* Again RFC allows many commas
*/
indx = skipCommas(at_sign - 1);
/*
* These commas may have been the end of the list
*/
token = (RFC822Token)this.tokens.elementAt(indx);
if (token.isSpecialChar('<') == true)
{
at_sign = indx + 1;
break;
}
at_sign = findBeginDomain(indx);
if (at_sign < 1)
{
// ERROR: Must always be room to move back from
// the start of a route because it is
// always enclosed in a route-addr.
// So it is always ok to look for the','
// In the optional comma separated list.
this.error_code = BAD_ROUTE_DOMAIN;
return(null);
}
token = (RFC822Token)this.tokens.elementAt(at_sign - 1);
}
addr_token = new AddressToken(at_sign, indx2, AddressToken.ROUTE);
return(addr_token);
}
/*
* Having been passed the rightmost token, collect the
* mailbox and pass it back as a mailbox token.
* On an error return null and set an error code.
*/
private MailboxToken collectMailbox(int indx2) throws RFC822ParserException
{
MailboxToken mailbox = null;
AddressToken addr_token = null;
AddressToken phrase;
RFC822Token token;
int indx;
/*
* A legal mailbox either ends in a '>'
* symbol and is a phrase route-addr pair,
* or it is a simple addr-spec.
*/
token = (RFC822Token)this.tokens.elementAt(indx2);
/*
* Get the phrase route-addr pair.
*/
if (token.isSpecialChar('>') == true)
{
addr_token = collectRouteAddr(indx2);
// if returns null error code already set.
if (addr_token == null)
{
return(null);
}
indx = addr_token.getTokenStart();
phrase = collectPhrase(indx);
if (this.error_code == NO_PHRASE)
{
throw new RouteAddrNoPhraseException("No valid phrase with this route address.", addr_token, this.tokens);
}
// if returns null error code already set.
if (phrase == null)
{
return(null);
}
indx = phrase.getTokenStart();
mailbox = new MailboxToken(phrase, addr_token);
}
/*
* Get the addr-spec.
*/
else
{
addr_token = collectAddrSpec(indx2);
// if returns null error code already set.
if (addr_token == null)
{
return(null);
}
indx = addr_token.getTokenStart();
mailbox = new MailboxToken(addr_token);
}
return(mailbox);
}
}