mirror of
https://github.com/mozilla/gecko-dev.git
synced 2024-10-31 14:15:30 +00:00
419 lines
18 KiB
C++
419 lines
18 KiB
C++
/*
|
|
* source/format/spellfmt.h, international, international, 971113b 97/10/30
|
|
*
|
|
* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
|
|
* (C) Copyright IBM Corp. 1996 - All Rights Reserved
|
|
*
|
|
* Portions copyright (c) 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
|
|
*
|
|
* The original version of this source code and documentation is copyrighted
|
|
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
|
|
* materials are provided under terms of a License Agreement between Taligent
|
|
* and Sun. This technology is protected by multiple US and International
|
|
* patents. This notice and attribution to Taligent may not be removed.
|
|
* Taligent is a registered trademark of Taligent, Inc.
|
|
*
|
|
* Permission to use, copy, modify, and distribute this software
|
|
* and its documentation for NON-COMMERCIAL purposes and without
|
|
* fee is hereby granted provided that this copyright notice
|
|
* appears in all copies. Please refer to the file "copyright.html"
|
|
* for further important copyright and licensing information.
|
|
*
|
|
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
|
|
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
|
|
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
|
|
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
|
|
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
|
|
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
|
|
*
|
|
*/
|
|
|
|
#ifndef _SPELLFMT
|
|
#define _SPELLFMT
|
|
|
|
#include "ptypes.h"
|
|
#include "numfmt.h"
|
|
|
|
struct NumberSpelloutRule;
|
|
class SpelloutRuleVector;
|
|
|
|
/**
|
|
* A class that spells out a numeric value in words (i.e., 123.45 becomes "one hundred
|
|
* twenty-three point four five"). You tell the NumberSpelloutFormat how to spell out
|
|
* numbers by passing its constructor a rule description string that it uses to build
|
|
* a rule list, which is in turn used to format and parse numbers.
|
|
* <P>
|
|
* The rule description language works as follows. Number spellout is specified through
|
|
* the use of an ordered list of rules, each of which has:
|
|
* <ul>
|
|
* <li>A <i>base value</i> that controls which numbers the rule is used for (a rule
|
|
* applies to the range from its base value to the next rule's base value minus one).
|
|
* <li>A <i>power of 10</i> that controls how the substitutions behave. Normally this
|
|
* is the base value's common log, but it can be lower.
|
|
* <li><i>Rule text,</i> which forms the basis of the format() function's return value.
|
|
* <li>An optional <i>major substitution,</i> which specifies the position where text is
|
|
* to be inserted into the result string. The inserted text is the string you get from
|
|
* using this rule set to format the value being formatted / the rule's power of 10.
|
|
* For example, if you use "100: << hundred >>;" to format 234, the major substitution
|
|
* value is 2, and "two" gets inserted where the << is.
|
|
* <li>An optional <i>minor substitution,</i> which also specifies a position where text
|
|
* is to be inserted into the result string. The inserted text is the string you get
|
|
* from using thisrule set to format the value being formatted % the rule's power of 10.
|
|
* For example, if you use "100: << hundred >>;" to format 234, the minor substitution
|
|
* value is 34, and "thirty-four" gets inserted where the >> is.
|
|
* </ul>
|
|
* In the description string, rules are separated by semicolons, and leading whitespace is
|
|
* ignored. The rule's base value may precede its text and is separated from it by a
|
|
* colon (you can include commas or periods for readability, but they're ignored). A
|
|
* > between the base value and the colon reduces the rule's power of 10 by one.
|
|
* <P>
|
|
* Within the rule text, << marks the position of the major substitution, and >> marks
|
|
* the position of the minor substitution. The rule text may include optional text in
|
|
* brackets. This text is only included when the minor substitution value is not zero
|
|
* (the minor substitution itself is usually included in the brackets).
|
|
* <P>
|
|
* The rule description may also include a <i>negative number rule,</i> which specifies
|
|
* how to format negative numbers. The negative number rule begins with "-:" instead of
|
|
* a base value, and the minor substitution tells where to put the result of formatting
|
|
* the number's absolute value.
|
|
* <P>
|
|
* The rule description may also include a <i>decimal rule,</i> which specifies how
|
|
* to format numbers that have fractional parts. The decimal rule begins with ".:"
|
|
* instead of a base value, the major substitution is replaced with the number's
|
|
* integral part, and the minor substitution is replaced by the number's fractional
|
|
* part, spelled out digit-by-digit.
|
|
* <P>
|
|
* The bracket notation actually expands into two rules in the rule list : one that doesn't
|
|
* include the stuff in the brackets, and one with one-higher base value that does. In
|
|
* other words,
|
|
* <pre>
|
|
* . 20:twenty[->>];
|
|
* . turns into
|
|
* . 20:twenty;
|
|
* . 21:twenty->>;
|
|
* . and
|
|
* . 100:<<hundred[>>];
|
|
* . turns into
|
|
* . 100:<<hundred;
|
|
* . 101:<<hundred>>;
|
|
* . To get an idea of how this owrks, the rules for U.S. English are as follows:
|
|
* . zero;one;two;three;four;five;six;seven;eight;nine;
|
|
* . ten;eleven;twelve;thirteen;fourteen;fifteen;sixteen;seventeen;eighteen;nineteen;
|
|
* . twenty[->>];
|
|
* . 30:thirty[->>];
|
|
* . 40:forty[->>];
|
|
* . 50:fifty[->>];
|
|
* . 60:sixty[->>];
|
|
* . 70:seventy[->>];
|
|
* . 80:eighty[->>];
|
|
* . 90:ninety[->>];
|
|
* . 100:<<hundred[>>];
|
|
* . 1000:<<thousand[>>];
|
|
* . 1000000:<<million[>>];
|
|
* . 1000000000:<<billion[>>];
|
|
* . 1000000000000:<<trillion[>>];
|
|
* . 1000000000000000:OUT OF RANGE!
|
|
* </pre>
|
|
* @see NumberFormat
|
|
* @version 1.22 9/17/97
|
|
* @author Richard Gillam
|
|
*/
|
|
class T_FORMAT_API NumberSpelloutFormat : public NumberFormat {
|
|
public:
|
|
/**
|
|
* Constructs a NumberSpelloutFormat that formats and parses numbers according
|
|
* to the default rule set (U.S. English).
|
|
*/
|
|
NumberSpelloutFormat();
|
|
|
|
/**
|
|
* Constructs a NumberSpelloutFormat that formats and parses numbers according to
|
|
* the rules specified in "description".
|
|
* @param description A String containing a textual description of the rules to use
|
|
* to format numbers. For information on the format of this string, see the class
|
|
* description.
|
|
* @param err the error code.
|
|
*/
|
|
NumberSpelloutFormat(const UnicodeString& description,
|
|
ErrorCode& err);
|
|
|
|
/**
|
|
* Copy constructor.
|
|
* @param that the copy origin.
|
|
*/
|
|
NumberSpelloutFormat(const NumberSpelloutFormat& that);
|
|
|
|
/**
|
|
* Destructor.
|
|
*/
|
|
~NumberSpelloutFormat();
|
|
|
|
/**
|
|
* Overrides operator==, checks if obj is the same object as this.
|
|
* @param obj the object to be compared with.
|
|
* @return TRUE if the obj is the same as this, FALSE otherwise.
|
|
*/
|
|
virtual t_bool operator==(const Format& obj) const;
|
|
|
|
/**
|
|
* Overrides Cloneable, creates an instance that is identical to this.
|
|
* @return the created instance.
|
|
*/
|
|
virtual Format* clone() const;
|
|
|
|
/**
|
|
* Formats a double number using this SpelloutNumberFormat instance and
|
|
* copy the result to output buffer.
|
|
* @param number the double number to be formatted with.
|
|
* @param output the result buffer.
|
|
* @return the result buffer.
|
|
*/
|
|
UnicodeString& format( double number,
|
|
UnicodeString& output) const;
|
|
|
|
/**
|
|
* Formats a long number using this SpelloutNumberFormat instance and
|
|
* copy the result to output buffer.
|
|
* @param number the long number to be formatted with.
|
|
* @param output the result buffer.
|
|
* @return the result buffer.
|
|
*/
|
|
UnicodeString& format( long number,
|
|
UnicodeString& output) const;
|
|
/**
|
|
* Appends a string representing "number" spelled out in words (according to this
|
|
* format's rule list) to the end of toAppendTo.
|
|
* @param number The number to format.
|
|
* @param toAppendTo The StringBuffer to append the result to.
|
|
* @param pos Ignored on input. Set to point to the whole range covered by the
|
|
* formatted number on output.
|
|
* @return toAppendTo
|
|
*/
|
|
virtual UnicodeString& format(double number,
|
|
UnicodeString& toAppendTo,
|
|
FieldPosition& pos) const;
|
|
|
|
/**
|
|
* Appends a string representing "number" spelled out in words (according to this
|
|
* format's rule list) to the end of toAppendTo.
|
|
* @param number The number to format.
|
|
* @param toAppendTo The StringBuffer to append the result to.
|
|
* @param pos Ignored on input. Set to point to the whole range covered by the
|
|
* formatted number on output.
|
|
* @return toAppendTo
|
|
*/
|
|
virtual UnicodeString& format(long number,
|
|
UnicodeString& toAppendTo,
|
|
FieldPosition& pos) const;
|
|
|
|
/*Added in order not to hide the superclass implementation [Bertrand A. D. 01/20/98]*/
|
|
virtual UnicodeString& format(const Formattable&,UnicodeString&,FieldPosition&,ErrorCode&) const;
|
|
/*end of update [Bertrand A. D. 01/20/98]*/
|
|
|
|
/**
|
|
* Parses "text" and returns a Number containing the value represented by "text".
|
|
* @param text The string to parse.
|
|
* @param result The value represented by the string. If possible, this will be an
|
|
* instance of Long; otherwise, it will be an instance of Double.
|
|
* @param status the error code status.
|
|
*/
|
|
virtual void parse( const UnicodeString& text,
|
|
Formattable& result,
|
|
ErrorCode& status) const;
|
|
|
|
/**
|
|
* Parses "text" and returns a Number containing the value represented by "text".
|
|
* @param text The string to parse.
|
|
* @param parsePosition On entry, specifies the position in the string to begin parsing at.
|
|
* The formatted number is expected to run from this position to the end of the
|
|
* string. On exit, if the parse succeeded, this will point to the string's past-the-end
|
|
* posiion. If the parse failed, it will have been left unchanged.
|
|
* @return The value represented by the string. If possible, this will be an
|
|
* instance of Long; otherwise, it will be an instance of Double.
|
|
*/
|
|
virtual void parse(const UnicodeString& text,
|
|
Formattable& result,
|
|
ParsePosition& parsePosition) const;
|
|
|
|
/**
|
|
* Return the class ID for this class. This is useful only for
|
|
* comparing to a return value from getDynamicClassID(). For example:
|
|
* <pre>
|
|
* . Base* polymorphic_pointer = createPolymorphicObject();
|
|
* . if (polymorphic_pointer->getDynamicClassID() ==
|
|
* . Derived::getStaticClassID()) ...
|
|
* </pre>
|
|
* @return The class ID for all objects of this class.
|
|
*/
|
|
static ClassID getStaticClassID() { return (ClassID)&fgClassID; }
|
|
|
|
/**
|
|
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
|
|
* This method is to implement a simple version of RTTI, since not all
|
|
* C++ compilers support genuine RTTI. Polymorphic operator==() and
|
|
* clone() methods call this method.
|
|
*
|
|
* @return The class ID for this object. All objects of a
|
|
* given class have the same class ID. Objects of
|
|
* other classes have different class IDs.
|
|
*/
|
|
virtual ClassID getDynamicClassID() const { return getStaticClassID(); }
|
|
|
|
private:
|
|
NumberSpelloutFormat& operator=(const NumberSpelloutFormat&);
|
|
|
|
//----------------------------------------------------------------------------
|
|
// implementation of formatting algorithm
|
|
//----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* The primary body of the formatting algorithm.
|
|
* @param x The value to format.
|
|
* @param result The StringBuffer into which to insert the result.
|
|
* @param pos The position in "result" where the result should be inserted.
|
|
*/
|
|
void doFormat(double x,
|
|
UnicodeString& result,
|
|
TextOffset pos) const;
|
|
|
|
/**
|
|
* The body of the formatAsDigits() algorithm.
|
|
* @param x The value to format.
|
|
* @param result The StringBuffer into which to insert the result.
|
|
* @param pos The position in "result" where the result is to be inserted.
|
|
*/
|
|
void doFormatAsDigits(double x,
|
|
UnicodeString& result,
|
|
TextOffset pos) const;
|
|
|
|
/**
|
|
* Returns the number of digits after the decimal point in a double number x.
|
|
* @param x the double number
|
|
*/
|
|
static int digitsAfterDecimal(double x);
|
|
|
|
//----------------------------------------------------------------------------
|
|
// implementation of parsing algorithm
|
|
//----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* The main body of the parse algorithm.
|
|
* @param s The sring to parse.
|
|
* @param startAt The position of the first character to consider. Parsing
|
|
* proceeds from startAt to the <i>beginning</i> of the string.
|
|
* @param pos On exit, this is filled in with the position of the first character in
|
|
* s that was not matched by this call.
|
|
* @param endWithSub If true, match only rules that end with a substitution. If false,
|
|
* match only rules that <i>don't</i> end with a substitution.
|
|
* @param loBoundP10 Match only rules with a power of 10 greater than or equal to
|
|
* this value.
|
|
* @param hiBoundP10 Match only rules with a power of 10 less than or equal to this value.
|
|
* @return -1 as the error value, otherwise, the parsed value.
|
|
*/
|
|
double doParse(const UnicodeString& s,
|
|
TextOffset startAt,
|
|
ParsePosition& pos,
|
|
t_bool endWithSub,
|
|
t_int16 loBoundP10,
|
|
t_int16 hiBoundP10) const;
|
|
|
|
/**
|
|
* Caled by parse() to look for the text in the negative-number rule.
|
|
* @param s The string to parse
|
|
* @return 0 if the negative-number rule didn't match; otherwise, the value
|
|
* represented by the string.
|
|
*/
|
|
double parseNegative(const UnicodeString& s) const;
|
|
|
|
/**
|
|
* Called by parse() to match the decimal rule.
|
|
* @param s The string to parse.
|
|
* @return 0 if the string doesn't match the decimal rule. Otherwise, the value
|
|
* represented by the string.
|
|
*/
|
|
double parseDecimal(const UnicodeString& s) const;
|
|
|
|
/*
|
|
* Used by parseDecimal() to parse the fractional part of the string.
|
|
* @param s The string to parse.
|
|
* @return The (fractional) value of the string.
|
|
*/
|
|
double parseFractionalPart(const UnicodeString& s) const;
|
|
|
|
//----------------------------------------------------------------------------
|
|
// implementation functions for rule-description parsing
|
|
//----------------------------------------------------------------------------
|
|
|
|
/**
|
|
* Called by the constructor to build the formatter's rule list.
|
|
* @param description A String containing a textual description of the rules to use
|
|
* to format numbers. For information on the format of this string, see the class
|
|
* description.
|
|
* @param err the error code.
|
|
*/
|
|
void buildRuleList(const UnicodeString& description,
|
|
ErrorCode& err);
|
|
|
|
/*
|
|
* Fills in tempRuleList with a group of new NumberSpelloutRules, one for
|
|
* each semicolon-delimited substring of "description".
|
|
* @param description A String containing a textual description of the rules to use
|
|
* to format numbers. For information on the format of this string, see the class
|
|
* description.
|
|
* @param tempRuleList, the result rule list.
|
|
* @param err the error code.
|
|
*/
|
|
void buildRawRuleList(const UnicodeString& description,
|
|
SpelloutRuleVector& tempRuleList,
|
|
ErrorCode& err);
|
|
|
|
/*
|
|
* If the rule text starts with a number, sets the rule's base value to that number
|
|
* and removes the number from the rule text. If the rule text doesn't start with
|
|
* a number, sets the rule's base value to nextBaseValue. Also handles the "-:" and
|
|
* ".:" notation for the negative-number and decimal rules, and sets up the rule's
|
|
* power of 10.
|
|
* @param rule the spell-out rule
|
|
* @param nextBaseValue the base value of the next rule
|
|
* @param err the error code.
|
|
*/
|
|
void parseBaseValue(NumberSpelloutRule& rule,
|
|
double& nextBaseValue,
|
|
ErrorCode& err);
|
|
|
|
/*
|
|
* If the rule contains an expression in brackets, splits it into two rules: one
|
|
* without the bracketed text, and another one, with a base value one higher, that
|
|
* does include with bracketed text. The new rule is inserted right after the
|
|
* original rule in ruleList.
|
|
* @param rule the spell-out rule
|
|
* @param ruleList the rule list
|
|
* @param lineNum the number of rule in the list
|
|
* @param err the error code.
|
|
|
|
*/
|
|
void parseBracketExpression(NumberSpelloutRule& rule,
|
|
SpelloutRuleVector& ruleList,
|
|
int lineNum,
|
|
ErrorCode& err);
|
|
|
|
|
|
/*
|
|
* Sets up the rule's substitutions by looking for the << and >> markers in the
|
|
* rule text. Removes the << and >> markers.
|
|
* @param rule the spell-out rule
|
|
*/
|
|
void parseSubstitutions(NumberSpelloutRule& rule);
|
|
|
|
NumberSpelloutRule* ruleList;
|
|
t_int16 numRules;
|
|
NumberSpelloutRule* negativeNumberRule;
|
|
NumberSpelloutRule* decimalRule;
|
|
|
|
static const UnicodeString DEFAULT_SPELLOUT_DESCRIPTION;
|
|
|
|
static char fgClassID;
|
|
};
|
|
|
|
#endif
|