Fixes for emoticon recognition - Bug #21203 - r: rhp (externally contributed)

This commit is contained in:
rhp%netscape.com 2000-01-11 01:56:25 +00:00
parent 993cdfefc9
commit f428189b73
3 changed files with 179 additions and 181 deletions

View File

@ -1,4 +1,4 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*-
*
* The "License" shall be the Mozilla Public License Version 1.1, except
* Sections 6.2 and 11, but with the addition of the below defined Section 14.
@ -37,6 +37,18 @@
#include "nsIIOService.h"
#include "nsIServiceManager.h"
static nsAutoString
Right(const nsAutoString& text, PRUint32 start)
{
MOZ_TIMER_START(mRightTimer);
nsAutoString result;
text.Right(result, text.Length() - start);
MOZ_TIMER_STOP(mRightTimer);
return result;
}
nsAutoString
mozTXTToHTMLConv::EscapeChar(const PRUnichar ch)
{
@ -186,16 +198,21 @@ mozTXTToHTMLConv::FindURLStart(const nsAutoString& text, const PRUint32 pos,
PRInt32 i = pos + 1;
for (; i >= 0
&& text[PRUint32(i)] != '>' && text[PRUint32(i)] != '<'
&& text[PRUint32(i)] != '"' && text[PRUint32(i)] != '\\'
&& text[PRUint32(i)] != '`' && text[PRUint32(i)] != '}'
&& text[PRUint32(i)] != ']' && text[PRUint32(i)] != ')'
&& text[PRUint32(i)] != '|'
&& text[PRUint32(i)] != '"' && text[PRUint32(i)] != '\''
&& text[PRUint32(i)] != '`' && text[PRUint32(i)] != ','
&& text[PRUint32(i)] != '{' && text[PRUint32(i)] != '['
&& text[PRUint32(i)] != '(' && text[PRUint32(i)] != '|'
&& text[PRUint32(i)] != '\\'
&& !nsString::IsSpace(text[PRUint32(i)])
; i--)
;
if (PRUint32(++i) != pos)
if
(
nsString::IsAlpha(text[PRUint32(++i)]) ||
nsString::IsDigit(text[PRUint32(i)])
)
{
start = i;
start = PRUint32(i);
return PR_TRUE;
}
else
@ -238,9 +255,9 @@ mozTXTToHTMLConv::FindURLEnd(const nsAutoString& text, const PRUint32 pos,
for (; PRInt32(i) < text.Length()
&& text[i] != '>' && text[i] != '<'
&& text[i] != '"' && text[i] != '\''
&& text[i] != '`' && text[i] != '}'
&& text[i] != ']' && text[i] != ')'
&& text[i] != '|'
&& text[i] != '`' && text[i] != ','
&& text[i] != '}' && text[i] != ']'
&& text[i] != ')' && text[i] != '|'
&& !nsString::IsSpace(text[i])
; i++)
;
@ -303,7 +320,7 @@ mozTXTToHTMLConv::CalculateURLBoundaries(const nsAutoString& text,
nsAutoString temp;
text.Mid(temp, descstart, pos - descstart);
replaceBefore = ScanTXT(temp, ~kURLs /*prevents loop*/
& whathasbeendone).Length();
& whathasbeendone).Length();
return;
}
@ -425,18 +442,6 @@ mozTXTToHTMLConv::FindURL(const nsAutoString& text, const PRUint32 pos,
return state[check] == success;
}
nsAutoString
mozTXTToHTMLConv::Right(const nsAutoString& text, PRUint32 start)
{
MOZ_TIMER_START(mRightTimer);
nsAutoString result;
text.Right(result, text.Length() - start);
MOZ_TIMER_STOP(mRightTimer);
return result;
}
PRBool
mozTXTToHTMLConv::ItMatchesDelimited(const nsAutoString& text,
const char* rep, LIMTYPE before, LIMTYPE after)
@ -481,9 +486,7 @@ mozTXTToHTMLConv::ItMatchesDelimited(const nsAutoString& text,
text[afterPos] == *rep
) ||
!(before == LT_IGNORE ? text : Right(text, 1)).Equals(rep,
PR_TRUE, repLen) // XXX bug #21071
/* !Equals((before == LT_IGNORE ? text : Right(text, 1)), rep,
PR_TRUE, rep.Length())*/
PR_TRUE, repLen)
)
return PR_FALSE;
@ -606,75 +609,80 @@ mozTXTToHTMLConv::GlyphHit(const nsAutoString& text, PRBool col0,
if
(
((col0 ? text.First() : text[1]) == ':' || // Performance increase
(col0 ? text.First() : text[1]) == ';' )
( // Performance increase
(col0 ? text.First() : text[1]) == ':' ||
(col0 ? text.First() : text[1]) == ';'
)
&&
(
SmilyHit(text, col0, ":-)", "<img SRC=\"chrome://messenger/skin/smile.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ":)", "<img SRC=\"chrome://messenger/skin/smile.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ":-(", "<img SRC=\"chrome://messenger/skin/frown.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ":(", "<img SRC=\"chrome://messenger/skin/frown.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ";-)", "<img SRC=\"chrome://messenger/skin/wink.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ";-P", "<img SRC=\"chrome://messenger/skin/sick.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen)
SmilyHit(text, col0, ":-)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":-)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ":)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ":-(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":-(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ":(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ";-)", "<img SRC=\"chrome://messenger/skin/wink.gif\" alt=\";-)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, col0, ";-P", "<img SRC=\"chrome://messenger/skin/sick.gif\" alt=\";-P\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen)
)
)
{
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
else if // XXX Hotfix
if // XXX Hotfix
(
col0 // Performance increase
&&
(
!col0 // Performance increase
&&
(
text[1] == ':' ||
text[1] == ';'
)
&&
(
SmilyHit(text, PR_FALSE, ":-)", "<img SRC=\"chrome://messenger/skin/smile.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ":)", "<img SRC=\"chrome://messenger/skin/smile.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ":-(", "<img SRC=\"chrome://messenger/skin/frown.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ":(", "<img SRC=\"chrome://messenger/skin/frown.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ";-)", "<img SRC=\"chrome://messenger/skin/wink.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ";-P", "<img SRC=\"chrome://messenger/skin/sick.gif\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen)
)
text[1] == ':' ||
text[1] == ';'
)
&&
(
SmilyHit(text, PR_FALSE, ":-)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":-)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ":)", "<img SRC=\"chrome://messenger/skin/smile.gif\" alt=\":)\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ":-(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":-(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ":(", "<img SRC=\"chrome://messenger/skin/frown.gif\" alt=\":(\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ";-)", "<img SRC=\"chrome://messenger/skin/wink.gif\" alt=\";-P\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen) ||
SmilyHit(text, PR_FALSE, ";-P", "<img SRC=\"chrome://messenger/skin/sick.gif\" alt=\";-P\" height=17 width=17 align=ABSCENTER>", outputHTML, glyphTextLen)
)
)
{
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
else if (ItMatchesDelimited(text, "(c)", LT_IGNORE, LT_DELIMITER))
// Note: ItMatchesDelimited compares case-insensitive
if (text.First() == '(')
{
outputHTML = "&copy;";
glyphTextLen = 3;
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
if (ItMatchesDelimited(text, "(c)", LT_IGNORE, LT_DELIMITER))
// Note: ItMatchesDelimited compares case-insensitive
{
outputHTML = "&copy;";
glyphTextLen = 3;
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
if (ItMatchesDelimited(text, "(r)", LT_IGNORE, LT_DELIMITER))
// see above
{
outputHTML = "&reg;";
glyphTextLen = 3;
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
}
else if (ItMatchesDelimited(text, "(r)", LT_IGNORE, LT_DELIMITER))
// see above
{
outputHTML = "&reg;";
glyphTextLen = 3;
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
else if (ItMatchesDelimited(text, " +/-", LT_IGNORE, LT_IGNORE))
if (ItMatchesDelimited(text, " +/-", LT_IGNORE, LT_IGNORE))
{
outputHTML = " &plusmn;";
glyphTextLen = 4;
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
else if (col0 && ItMatchesDelimited(text, "+/-", LT_IGNORE, LT_IGNORE))
if (col0 && ItMatchesDelimited(text, "+/-", LT_IGNORE, LT_IGNORE))
{
outputHTML = "&plusmn;";
glyphTextLen = 3;
MOZ_TIMER_STOP(mGlyphHitTimer);
return PR_TRUE;
}
else if // x^2 -> sup
if // x^2 -> sup
(
text[1] == '^' // Performance increase
&&
@ -849,24 +857,36 @@ printf(text.ToNewCString());
switch (text[i]) // Performance increase
{
case '*':
case '_':
case '/':
case '|':
if
(
StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
"*", "strong", "class=txt_star",
HTMLnsStr, structPhrase_strong) ||
StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
HTMLnsStr, structPhrase_strong))
{
result += HTMLnsStr;
i++;
continue;
}
case '_':
if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
"_", "em" /* <u> is deprecated */, "class=txt_underscore",
HTMLnsStr, structPhrase_underline) ||
StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
HTMLnsStr, structPhrase_underline))
{
result += HTMLnsStr;
i++;
continue;
}
case '/':
if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
"/", "em", "class=txt_slash",
HTMLnsStr, structPhrase_italic) ||
StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
HTMLnsStr, structPhrase_italic))
{
result += HTMLnsStr;
i++;
continue;
}
case '|':
if (StructPhraseHit(i == 0 ? text : Right(text, i - 1), i == 0,
"|", "code", "class=txt_verticalline",
HTMLnsStr, structPhrase_code)
)
HTMLnsStr, structPhrase_code))
{
result += HTMLnsStr;
i++;
@ -1047,7 +1067,7 @@ mozTXTToHTMLConv::ScanTXT(const PRUnichar *text, PRUint32 whattodo,
if (!_retval || !text)
return NS_ERROR_NULL_POINTER;
*_retval = ScanTXT(text, whattodo).ToNewUnicode();
return _retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}
NS_IMETHODIMP
@ -1057,7 +1077,7 @@ mozTXTToHTMLConv::ScanHTML(const PRUnichar *text, PRUint32 whattodo,
if (!_retval || !text)
return NS_ERROR_NULL_POINTER;
*_retval = ScanHTML(text, whattodo).ToNewUnicode();
return _retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
return *_retval ? NS_OK : NS_ERROR_OUT_OF_MEMORY;
}

View File

@ -49,7 +49,10 @@ static NS_DEFINE_CID(kTXTToHTMLConvCID, MOZITXTTOHTMLCONV_CID);
class mozTXTToHTMLConv : public mozITXTToHTMLConv
{
//////////////////////////////////////////////////////////
public:
//////////////////////////////////////////////////////////
mozTXTToHTMLConv();
virtual ~mozTXTToHTMLConv();
@ -84,16 +87,29 @@ public:
MOZ_TIMER_DECLARE(mRightTimer)
MOZ_TIMER_DECLARE(mTotalMimeTime)
///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////
protected:
///////////////////////////////////////////////////////////////////
//////////////////////////////////////////////////////////
/**
@param text (in): the source string
@param start (in): offset of text specifying the start of the new object
@return a new (local) object containing the substring
*/
nsAutoString Right(const nsAutoString& text, PRUint32 start);
Completes<ul>
<li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
<li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
<li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
</ul>
It does no check, if the resulting URL is valid.
@param text (in): abbreviated URL
@param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
@return Completed URL at success and empty string at failure
*/
nsAutoString CompleteAbbreviatedURL(const nsAutoString& text,
const PRUint32 pos);
//////////////////////////////////////////////////////////
private:
//////////////////////////////////////////////////////////
enum LIMTYPE
{
@ -145,20 +161,6 @@ protected:
*/
nsAutoString UnescapeStr(const nsAutoString& aString);
/**
Completes<ul>
<li>Case 1: "mozilla@bucksch.org" to "mailto:mozilla@bucksch.org"
<li>Case 2: "www.mozilla.org" to "http://www.mozilla.org"
<li>Case 3: "ftp.mozilla.org" to "ftp://www.mozilla.org"
</ul>
It does no check, if the resulting URL is valid.
@param text (in): abbreviated URL
@param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
@return Completed URL at success and empty string at failure
*/
nsAutoString CompleteAbbreviatedURL(const nsAutoString& text,
const PRUint32 pos);
/**
<em>Note</em>: I use different strategies to pass context between the
functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
@ -185,60 +187,6 @@ protected:
const PRUint32 whathasbeendone, nsAutoString& outputHTML,
PRInt32& replaceBefore, PRInt32& replaceAfter);
/**
@param text (in): line of text possibly with tagTXT.<p>
if col0 is true,
starting with tagTXT<br>
else
starting one char before tagTXT
@param col0 (in): tagTXT is on the beginning of the line (or paragraph).
open must be 0 then.
@param tagTXT (in): Tag in plaintext to search for, e.g. "*"
@param tagHTML (in): HTML-Tag to replace tagTXT with,
without "<" and ">", e.g. "strong"
@param attributeHTML (in): HTML-attribute to add to opening tagHTML,
e.g. "class=txt_star"
@param outputHTML (out): string to insert in output stream
@param open (in/out): Number of currently open tags of type tagHTML
@return Conversion succeeded
*/
PRBool StructPhraseHit(const nsAutoString& text, PRBool col0,
const char* tagTXT,
const char* tagHTML, const char* attributeHTML,
nsAutoString& outputHTML, PRUint32& openTags);
/**
@param text (in), col0 (in): see GlyphHit
@param tagTXT (in): Smily, see also StructPhraseHit
@param tagHTML (in): see StructPhraseHit
@param outputHTML (out), glyphTextLen (out): see GlyphHit
*/
PRBool SmilyHit(const nsAutoString& text, PRBool col0,
const char* tagTXT, const char* tagHTML,
nsAutoString& outputHTML, PRInt32& glyphTextLen);
/**
Checks, if we can replace some chars at the start of line with prettier HTML
code.<p>
If success is reported, replace the first glyphTextLen chars with outputHTML
@param text (in): line of text possibly with Glyph.<p>
If col0 is true,
starting with Glyph <br><!-- (br not part of text) -->
else
starting one char before Glyph
@param col0 (in): text starts at the beginning of the line (or paragraph)
@param outputHTML (out): see StructPhraseHit
@param glyphTextLen (out): Length of original text to replace
@return see StructPhraseHit
*/
PRBool GlyphHit(const nsAutoString& text, PRBool col0,
nsAutoString& outputHTML, PRInt32& glyphTextLen);
//////////////////////////////////////////////////////////
private:
//////////////////////////////////////////////////////////
enum modetype {
unknown,
RFC1738, /* Check, if RFC1738, APPENDIX compliant,
@ -249,7 +197,7 @@ private:
Also allow email addresses without scheme,
e.g. "<mozilla@bucksch.org>" */
freetext, /* assume heading scheme
with "[a-zA-Z][a-zA-Z0-9+\-.]*:" like "news:"
with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
(see RFC2396, Section 3.1).
Certain characters (see code) or any whitespace
(including linebreaks) end the URL.
@ -306,6 +254,57 @@ private:
PRBool CheckURLAndCreateHTML(
const nsAutoString& txtURL, const nsAutoString& desc,
nsAutoString& outputHTML);
/**
@param text (in): line of text possibly with tagTXT.<p>
if col0 is true,
starting with tagTXT<br>
else
starting one char before tagTXT
@param col0 (in): tagTXT is on the beginning of the line (or paragraph).
open must be 0 then.
@param tagTXT (in): Tag in plaintext to search for, e.g. "*"
@param tagHTML (in): HTML-Tag to replace tagTXT with,
without "<" and ">", e.g. "strong"
@param attributeHTML (in): HTML-attribute to add to opening tagHTML,
e.g. "class=txt_star"
@param outputHTML (out): string to insert in output stream
@param open (in/out): Number of currently open tags of type tagHTML
@return Conversion succeeded
*/
PRBool StructPhraseHit(const nsAutoString& text, PRBool col0,
const char* tagTXT,
const char* tagHTML, const char* attributeHTML,
nsAutoString& outputHTML, PRUint32& openTags);
/**
@param text (in), col0 (in): see GlyphHit
@param tagTXT (in): Smily, see also StructPhraseHit
@param tagHTML (in): see StructPhraseHit
@param outputHTML (out), glyphTextLen (out): see GlyphHit
*/
PRBool SmilyHit(const nsAutoString& text, PRBool col0,
const char* tagTXT, const char* tagHTML,
nsAutoString& outputHTML, PRInt32& glyphTextLen);
/**
Checks, if we can replace some chars at the start of line with prettier HTML
code.<p>
If success is reported, replace the first glyphTextLen chars with outputHTML
@param text (in): line of text possibly with Glyph.<p>
If col0 is true,
starting with Glyph <br><!-- (br not part of text) -->
else
starting one char before Glyph
@param col0 (in): text starts at the beginning of the line (or paragraph)
@param outputHTML (out): see StructPhraseHit
@param glyphTextLen (out): Length of original text to replace
@return see StructPhraseHit
*/
PRBool GlyphHit(const nsAutoString& text, PRBool col0,
nsAutoString& outputHTML, PRInt32& glyphTextLen);
};
// It's said, that Win32 and Mac don't like static const members

View File

@ -38,27 +38,6 @@
<p>
Wrapper class for various parsing routines, that convert plain text to HTML.
They try to recognize cites, URLs, plain text formattting like *bold* etc.
<p>
Use the nsString versions of these functions in the mozTXTToHTMLConv
implementation when calling from C++.
<p>
There're a lot of protected virtual functions in the mozTXTToHTMLConv
implementation, that might be helpful, too. Add wrappers for them to this or
any other interface, if you need them. It's easier to add a function than to
remove it, so I don't add them now. Here's the list:
nsAutoString EscapeChar(const PRUnichar ch)
nsAutoString EscapeStr(const nsAutoString& aString)
nsAutoString UnescapeStr(const nsAutoString& aString)
nsAutoString CompleteAbbreviatedURL(const nsAutoString& text, PRUint32 pos)
PRBool FindURL(const nsAutoString& text, PRUint32 pos,
PRUint32 whathasbeendone, nsAutoString& outputHTML,
PRInt32& replaceBefore, PRInt32& replaceAfter)
nsAutoString Right(const nsAutoString& text, PRUint32 start)
PRBool ItMatchesDelimited(const nsAutoString& text, const nsAutoString& rep,
LIMTYPE before, LIMTYPE after)
PRUint32 NumberOfMatches(const nsAutoString& text, const nsAutoString& rep,
LIMTYPE before, LIMTYPE after)
*/
#include "nsIStreamConverter.idl"
@ -89,7 +68,7 @@ interface mozITXTToHTMLConv : nsIStreamConverter {
/*
Adds additional formatting to user edited text, that the user was too lazy
"unknowledged" (DELETEME: is that a word?) to make.
or "unknowledged" (DELETEME: is that a word?) to make.
<p>
<em>Note:</em> Don't use kGlyphSubstitution with this function. This option
generates tags, that are unuseable for UAs other than Mozilla. This would