1998-05-27 02:02:27 +00:00

419 lines
18 KiB
C++

/*
* source/format/spellfmt.h, international, international, 971113b 97/10/30
*
* (C) Copyright Taligent, Inc. 1996 - All Rights Reserved
* (C) Copyright IBM Corp. 1996 - All Rights Reserved
*
* Portions copyright (c) 1996-1997 Sun Microsystems, Inc. All Rights Reserved.
*
* The original version of this source code and documentation is copyrighted
* and owned by Taligent, Inc., a wholly-owned subsidiary of IBM. These
* materials are provided under terms of a License Agreement between Taligent
* and Sun. This technology is protected by multiple US and International
* patents. This notice and attribution to Taligent may not be removed.
* Taligent is a registered trademark of Taligent, Inc.
*
* Permission to use, copy, modify, and distribute this software
* and its documentation for NON-COMMERCIAL purposes and without
* fee is hereby granted provided that this copyright notice
* appears in all copies. Please refer to the file "copyright.html"
* for further important copyright and licensing information.
*
* SUN MAKES NO REPRESENTATIONS OR WARRANTIES ABOUT THE SUITABILITY OF
* THE SOFTWARE, EITHER EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED
* TO THE IMPLIED WARRANTIES OF MERCHANTABILITY, FITNESS FOR A
* PARTICULAR PURPOSE, OR NON-INFRINGEMENT. SUN SHALL NOT BE LIABLE FOR
* ANY DAMAGES SUFFERED BY LICENSEE AS A RESULT OF USING, MODIFYING OR
* DISTRIBUTING THIS SOFTWARE OR ITS DERIVATIVES.
*
*/
#ifndef _SPELLFMT
#define _SPELLFMT
#include "ptypes.h"
#include "numfmt.h"
struct NumberSpelloutRule;
class SpelloutRuleVector;
/**
* A class that spells out a numeric value in words (i.e., 123.45 becomes "one hundred
* twenty-three point four five"). You tell the NumberSpelloutFormat how to spell out
* numbers by passing its constructor a rule description string that it uses to build
* a rule list, which is in turn used to format and parse numbers.
* <P>
* The rule description language works as follows. Number spellout is specified through
* the use of an ordered list of rules, each of which has:
* <ul>
* <li>A <i>base value</i> that controls which numbers the rule is used for (a rule
* applies to the range from its base value to the next rule's base value minus one).
* <li>A <i>power of 10</i> that controls how the substitutions behave. Normally this
* is the base value's common log, but it can be lower.
* <li><i>Rule text,</i> which forms the basis of the format() function's return value.
* <li>An optional <i>major substitution,</i> which specifies the position where text is
* to be inserted into the result string. The inserted text is the string you get from
* using this rule set to format the value being formatted / the rule's power of 10.
* For example, if you use "100: &lt;&lt; hundred >>;" to format 234, the major substitution
* value is 2, and "two" gets inserted where the &lt;&lt; is.
* <li>An optional <i>minor substitution,</i> which also specifies a position where text
* is to be inserted into the result string. The inserted text is the string you get
* from using thisrule set to format the value being formatted % the rule's power of 10.
* For example, if you use "100: &lt;&lt; hundred >>;" to format 234, the minor substitution
* value is 34, and "thirty-four" gets inserted where the >> is.
* </ul>
* In the description string, rules are separated by semicolons, and leading whitespace is
* ignored. The rule's base value may precede its text and is separated from it by a
* colon (you can include commas or periods for readability, but they're ignored). A
* > between the base value and the colon reduces the rule's power of 10 by one.
* <P>
* Within the rule text, &lt;&lt; marks the position of the major substitution, and >> marks
* the position of the minor substitution. The rule text may include optional text in
* brackets. This text is only included when the minor substitution value is not zero
* (the minor substitution itself is usually included in the brackets).
* <P>
* The rule description may also include a <i>negative number rule,</i> which specifies
* how to format negative numbers. The negative number rule begins with "-:" instead of
* a base value, and the minor substitution tells where to put the result of formatting
* the number's absolute value.
* <P>
* The rule description may also include a <i>decimal rule,</i> which specifies how
* to format numbers that have fractional parts. The decimal rule begins with ".:"
* instead of a base value, the major substitution is replaced with the number's
* integral part, and the minor substitution is replaced by the number's fractional
* part, spelled out digit-by-digit.
* <P>
* The bracket notation actually expands into two rules in the rule list : one that doesn't
* include the stuff in the brackets, and one with one-higher base value that does. In
* other words,
* <pre>
* . 20:twenty[->>];
* . turns into
* . 20:twenty;
* . 21:twenty->>;
* . and
* . 100:&lt;&lt;hundred[>>];
* . turns into
* . 100:&lt;&lt;hundred;
* . 101:&lt;&lt;hundred>>;
* . To get an idea of how this owrks, the rules for U.S. English are as follows:
* . zero;one;two;three;four;five;six;seven;eight;nine;
* . ten;eleven;twelve;thirteen;fourteen;fifteen;sixteen;seventeen;eighteen;nineteen;
* . twenty[->>];
* . 30:thirty[->>];
* . 40:forty[->>];
* . 50:fifty[->>];
* . 60:sixty[->>];
* . 70:seventy[->>];
* . 80:eighty[->>];
* . 90:ninety[->>];
* . 100:&lt;&lt;hundred[>>];
* . 1000:&lt;&lt;thousand[>>];
* . 1000000:&lt;&lt;million[>>];
* . 1000000000:&lt;&lt;billion[>>];
* . 1000000000000:&lt;&lt;trillion[>>];
* . 1000000000000000:OUT OF RANGE!
* </pre>
* @see NumberFormat
* @version 1.22 9/17/97
* @author Richard Gillam
*/
class T_FORMAT_API NumberSpelloutFormat : public NumberFormat {
public:
/**
* Constructs a NumberSpelloutFormat that formats and parses numbers according
* to the default rule set (U.S. English).
*/
NumberSpelloutFormat();
/**
* Constructs a NumberSpelloutFormat that formats and parses numbers according to
* the rules specified in "description".
* @param description A String containing a textual description of the rules to use
* to format numbers. For information on the format of this string, see the class
* description.
* @param err the error code.
*/
NumberSpelloutFormat(const UnicodeString& description,
ErrorCode& err);
/**
* Copy constructor.
* @param that the copy origin.
*/
NumberSpelloutFormat(const NumberSpelloutFormat& that);
/**
* Destructor.
*/
~NumberSpelloutFormat();
/**
* Overrides operator==, checks if obj is the same object as this.
* @param obj the object to be compared with.
* @return TRUE if the obj is the same as this, FALSE otherwise.
*/
virtual t_bool operator==(const Format& obj) const;
/**
* Overrides Cloneable, creates an instance that is identical to this.
* @return the created instance.
*/
virtual Format* clone() const;
/**
* Formats a double number using this SpelloutNumberFormat instance and
* copy the result to output buffer.
* @param number the double number to be formatted with.
* @param output the result buffer.
* @return the result buffer.
*/
UnicodeString& format( double number,
UnicodeString& output) const;
/**
* Formats a long number using this SpelloutNumberFormat instance and
* copy the result to output buffer.
* @param number the long number to be formatted with.
* @param output the result buffer.
* @return the result buffer.
*/
UnicodeString& format( long number,
UnicodeString& output) const;
/**
* Appends a string representing "number" spelled out in words (according to this
* format's rule list) to the end of toAppendTo.
* @param number The number to format.
* @param toAppendTo The StringBuffer to append the result to.
* @param pos Ignored on input. Set to point to the whole range covered by the
* formatted number on output.
* @return toAppendTo
*/
virtual UnicodeString& format(double number,
UnicodeString& toAppendTo,
FieldPosition& pos) const;
/**
* Appends a string representing "number" spelled out in words (according to this
* format's rule list) to the end of toAppendTo.
* @param number The number to format.
* @param toAppendTo The StringBuffer to append the result to.
* @param pos Ignored on input. Set to point to the whole range covered by the
* formatted number on output.
* @return toAppendTo
*/
virtual UnicodeString& format(long number,
UnicodeString& toAppendTo,
FieldPosition& pos) const;
/*Added in order not to hide the superclass implementation [Bertrand A. D. 01/20/98]*/
virtual UnicodeString& format(const Formattable&,UnicodeString&,FieldPosition&,ErrorCode&) const;
/*end of update [Bertrand A. D. 01/20/98]*/
/**
* Parses "text" and returns a Number containing the value represented by "text".
* @param text The string to parse.
* @param result The value represented by the string. If possible, this will be an
* instance of Long; otherwise, it will be an instance of Double.
* @param status the error code status.
*/
virtual void parse( const UnicodeString& text,
Formattable& result,
ErrorCode& status) const;
/**
* Parses "text" and returns a Number containing the value represented by "text".
* @param text The string to parse.
* @param parsePosition On entry, specifies the position in the string to begin parsing at.
* The formatted number is expected to run from this position to the end of the
* string. On exit, if the parse succeeded, this will point to the string's past-the-end
* posiion. If the parse failed, it will have been left unchanged.
* @return The value represented by the string. If possible, this will be an
* instance of Long; otherwise, it will be an instance of Double.
*/
virtual void parse(const UnicodeString& text,
Formattable& result,
ParsePosition& parsePosition) const;
/**
* Return the class ID for this class. This is useful only for
* comparing to a return value from getDynamicClassID(). For example:
* <pre>
* . Base* polymorphic_pointer = createPolymorphicObject();
* . if (polymorphic_pointer->getDynamicClassID() ==
* . Derived::getStaticClassID()) ...
* </pre>
* @return The class ID for all objects of this class.
*/
static ClassID getStaticClassID() { return (ClassID)&fgClassID; }
/**
* Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
* This method is to implement a simple version of RTTI, since not all
* C++ compilers support genuine RTTI. Polymorphic operator==() and
* clone() methods call this method.
*
* @return The class ID for this object. All objects of a
* given class have the same class ID. Objects of
* other classes have different class IDs.
*/
virtual ClassID getDynamicClassID() const { return getStaticClassID(); }
private:
NumberSpelloutFormat& operator=(const NumberSpelloutFormat&);
//----------------------------------------------------------------------------
// implementation of formatting algorithm
//----------------------------------------------------------------------------
/**
* The primary body of the formatting algorithm.
* @param x The value to format.
* @param result The StringBuffer into which to insert the result.
* @param pos The position in "result" where the result should be inserted.
*/
void doFormat(double x,
UnicodeString& result,
TextOffset pos) const;
/**
* The body of the formatAsDigits() algorithm.
* @param x The value to format.
* @param result The StringBuffer into which to insert the result.
* @param pos The position in "result" where the result is to be inserted.
*/
void doFormatAsDigits(double x,
UnicodeString& result,
TextOffset pos) const;
/**
* Returns the number of digits after the decimal point in a double number x.
* @param x the double number
*/
static int digitsAfterDecimal(double x);
//----------------------------------------------------------------------------
// implementation of parsing algorithm
//----------------------------------------------------------------------------
/**
* The main body of the parse algorithm.
* @param s The sring to parse.
* @param startAt The position of the first character to consider. Parsing
* proceeds from startAt to the <i>beginning</i> of the string.
* @param pos On exit, this is filled in with the position of the first character in
* s that was not matched by this call.
* @param endWithSub If true, match only rules that end with a substitution. If false,
* match only rules that <i>don't</i> end with a substitution.
* @param loBoundP10 Match only rules with a power of 10 greater than or equal to
* this value.
* @param hiBoundP10 Match only rules with a power of 10 less than or equal to this value.
* @return -1 as the error value, otherwise, the parsed value.
*/
double doParse(const UnicodeString& s,
TextOffset startAt,
ParsePosition& pos,
t_bool endWithSub,
t_int16 loBoundP10,
t_int16 hiBoundP10) const;
/**
* Caled by parse() to look for the text in the negative-number rule.
* @param s The string to parse
* @return 0 if the negative-number rule didn't match; otherwise, the value
* represented by the string.
*/
double parseNegative(const UnicodeString& s) const;
/**
* Called by parse() to match the decimal rule.
* @param s The string to parse.
* @return 0 if the string doesn't match the decimal rule. Otherwise, the value
* represented by the string.
*/
double parseDecimal(const UnicodeString& s) const;
/*
* Used by parseDecimal() to parse the fractional part of the string.
* @param s The string to parse.
* @return The (fractional) value of the string.
*/
double parseFractionalPart(const UnicodeString& s) const;
//----------------------------------------------------------------------------
// implementation functions for rule-description parsing
//----------------------------------------------------------------------------
/**
* Called by the constructor to build the formatter's rule list.
* @param description A String containing a textual description of the rules to use
* to format numbers. For information on the format of this string, see the class
* description.
* @param err the error code.
*/
void buildRuleList(const UnicodeString& description,
ErrorCode& err);
/*
* Fills in tempRuleList with a group of new NumberSpelloutRules, one for
* each semicolon-delimited substring of "description".
* @param description A String containing a textual description of the rules to use
* to format numbers. For information on the format of this string, see the class
* description.
* @param tempRuleList, the result rule list.
* @param err the error code.
*/
void buildRawRuleList(const UnicodeString& description,
SpelloutRuleVector& tempRuleList,
ErrorCode& err);
/*
* If the rule text starts with a number, sets the rule's base value to that number
* and removes the number from the rule text. If the rule text doesn't start with
* a number, sets the rule's base value to nextBaseValue. Also handles the "-:" and
* ".:" notation for the negative-number and decimal rules, and sets up the rule's
* power of 10.
* @param rule the spell-out rule
* @param nextBaseValue the base value of the next rule
* @param err the error code.
*/
void parseBaseValue(NumberSpelloutRule& rule,
double& nextBaseValue,
ErrorCode& err);
/*
* If the rule contains an expression in brackets, splits it into two rules: one
* without the bracketed text, and another one, with a base value one higher, that
* does include with bracketed text. The new rule is inserted right after the
* original rule in ruleList.
* @param rule the spell-out rule
* @param ruleList the rule list
* @param lineNum the number of rule in the list
* @param err the error code.
*/
void parseBracketExpression(NumberSpelloutRule& rule,
SpelloutRuleVector& ruleList,
int lineNum,
ErrorCode& err);
/*
* Sets up the rule's substitutions by looking for the &lt;&lt; and >> markers in the
* rule text. Removes the &lt;&lt; and >> markers.
* @param rule the spell-out rule
*/
void parseSubstitutions(NumberSpelloutRule& rule);
NumberSpelloutRule* ruleList;
t_int16 numRules;
NumberSpelloutRule* negativeNumberRule;
NumberSpelloutRule* decimalRule;
static const UnicodeString DEFAULT_SPELLOUT_DESCRIPTION;
static char fgClassID;
};
#endif