scummvm/common/tokenizer.h

/* ScummVM - Graphic Adventure Engine
 *
 * ScummVM is the legal property of its developers, whose names
 * are too numerous to list here. Please refer to the COPYRIGHT
 * file distributed with this source distribution.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 *
 */

#ifndef COMMON_TOKENIZER_H
#define COMMON_TOKENIZER_H

#include "common/scummsys.h"
#include "common/str.h"
#include "common/ustr.h"

namespace Common {

/**
 * @defgroup common_tokenizer String tokenizer
 * @ingroup common
 *
 * @brief String tokenizer for creating tokens out of parts of a string.
 *
 * @{
 */

/**
 * A simple non-optimized string tokenizer.
 *
 * Example of use:
 * StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
 */
class StringTokenizer {
public:
	/**
	 * Creates a StringTokenizer.
	 * @param str The string to be tokenized.
	 * @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
	 * @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
	 */
	StringTokenizer(const String &str, const String &delimiters = " \t\r\n\f\v");
	void reset();       ///< Resets the tokenizer to its initial state
	bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise
	String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)

private:
	const String _str;        ///< The string to be tokenized
	const String _delimiters; ///< String containing all the delimiter characters
	uint         _tokenBegin; ///< Latest found token's begin (Valid after a call to nextToken(), zero otherwise)
	uint         _tokenEnd;   ///< Latest found token's end (Valid after a call to nextToken(), zero otherwise)
};

/**
 * A simple non-optimized unicode-string tokenizer.
 *
 * Example of use:
 * U32StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().
 * Using non-ascii chars will also work, and is recommended to use this over StringTokenizer if string contains unicode chars.
 */
class U32StringTokenizer {
public:
	/**
	 * Creates a UnicodeStringTokenizer.
	 * @param str The unicode string to be tokenized.
	 * @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).
	 * @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.
	 */
	U32StringTokenizer(const U32String &str, const String &delimiters = " \t\r\n\f\v");
	void reset();       ///< Resets the tokenizer to its initial state, i.e points boten token iterators to the beginning
	bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise
	U32String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)

private:
	const U32String _str;        ///< The unicode string to be tokenized
	const String    _delimiters; ///< String containing all the delimiter characters
	U32String::const_iterator            _tokenBegin; ///< Latest found token's begin iterator (Valid after a call to nextToken())
	U32String::const_iterator            _tokenEnd;   ///< Latest found token's end iterator (Valid after a call to nextToken())
};

/** @} */

} // End of namespace Common

#endif
Move StringTokenizer to its own files (tokenizer.h/tokenizer.cpp). svn-id: r48310 2010-03-20 12:56:56 +00:00			`/* ScummVM - Graphic Adventure Engine`
			`*`
			`* ScummVM is the legal property of its developers, whose names`
			`* are too numerous to list here. Please refer to the COPYRIGHT`
			`* file distributed with this source distribution.`
			`*`
ALL: Update ScummVM project license to GPLv3+ 2021-12-26 17:47:58 +00:00			`* This program is free software: you can redistribute it and/or modify`
			`* it under the terms of the GNU General Public License as published by`
			`* the Free Software Foundation, either version 3 of the License, or`
			`* (at your option) any later version.`
Move StringTokenizer to its own files (tokenizer.h/tokenizer.cpp). svn-id: r48310 2010-03-20 12:56:56 +00:00			`*`
			`* This program is distributed in the hope that it will be useful,`
			`* but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the`
			`* GNU General Public License for more details.`
			`*`
			`* You should have received a copy of the GNU General Public License`
ALL: Update ScummVM project license to GPLv3+ 2021-12-26 17:47:58 +00:00			`* along with this program. If not, see <http://www.gnu.org/licenses/>.`
COMMON: Make GPL headers consistent in themselves. 2014-02-18 01:34:18 +00:00			`*`
Move StringTokenizer to its own files (tokenizer.h/tokenizer.cpp). svn-id: r48310 2010-03-20 12:56:56 +00:00			`*/`

			`#ifndef COMMON_TOKENIZER_H`
			`#define COMMON_TOKENIZER_H`

			`#include "common/scummsys.h"`
			`#include "common/str.h"`
GUI: U32: Add and use a U32Tokenizer - Added in common/tokenizer.cpp - Uses iterators to function 2020-07-21 20:44:39 +00:00			`#include "common/ustr.h"`
Move StringTokenizer to its own files (tokenizer.h/tokenizer.cpp). svn-id: r48310 2010-03-20 12:56:56 +00:00
			`namespace Common {`

DOXYGEN: Add doxygen groups to header files in the common folder Adding @defgroup and @ingroup doxygen tags into all headers in the common folder that contain doxygen blocks. This improves the structure, readability, and findability of information in the resulting output. This commit targets purely structure and does not deal with the content of the currently existing doxygen documentation. 2020-07-08 21:30:36 +00:00			`/**`
			`* @defgroup common_tokenizer String tokenizer`
			`* @ingroup common`
			`*`
			`* @brief String tokenizer for creating tokens out of parts of a string.`
			`*`
			`* @{`
			`*/`

Move StringTokenizer to its own files (tokenizer.h/tokenizer.cpp). svn-id: r48310 2010-03-20 12:56:56 +00:00			`/**`
			`* A simple non-optimized string tokenizer.`
			`*`
			`* Example of use:`
			`* StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().`
			`*/`
			`class StringTokenizer {`
			`public:`
			`/**`
			`* Creates a StringTokenizer.`
			`* @param str The string to be tokenized.`
			`* @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).`
			`* @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.`
			`*/`
			`StringTokenizer(const String &str, const String &delimiters = " \t\r\n\f\v");`
			`void reset(); ///< Resets the tokenizer to its initial state`
			`bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise`
			`String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)`

			`private:`
			`const String _str; ///< The string to be tokenized`
			`const String _delimiters; ///< String containing all the delimiter characters`
			`uint _tokenBegin; ///< Latest found token's begin (Valid after a call to nextToken(), zero otherwise)`
			`uint _tokenEnd; ///< Latest found token's end (Valid after a call to nextToken(), zero otherwise)`
			`};`

GUI: U32: Add and use a U32Tokenizer - Added in common/tokenizer.cpp - Uses iterators to function 2020-07-21 20:44:39 +00:00			`/**`
			`* A simple non-optimized unicode-string tokenizer.`
			`*`
			`* Example of use:`
			`* U32StringTokenizer("Now, this is a test!", " ,!") gives tokens "Now", "this", "is", "a" and "test" using nextToken().`
			`* Using non-ascii chars will also work, and is recommended to use this over StringTokenizer if string contains unicode chars.`
			`*/`
			`class U32StringTokenizer {`
			`public:`
			`/**`
			`* Creates a UnicodeStringTokenizer.`
			`* @param str The unicode string to be tokenized.`
			`* @param delimiters String containing all the delimiter characters (i.e. the characters to be ignored).`
			`* @note Uses space, horizontal tab, carriage return, newline, form feed and vertical tab as delimiters by default.`
			`*/`
			`U32StringTokenizer(const U32String &str, const String &delimiters = " \t\r\n\f\v");`
			`void reset(); ///< Resets the tokenizer to its initial state, i.e points boten token iterators to the beginning`
			`bool empty() const; ///< Returns true if there are no more tokens left in the string, false otherwise`
			`U32String nextToken(); ///< Returns the next token from the string (Or an empty string if there are no more tokens)`

			`private:`
			`const U32String _str; ///< The unicode string to be tokenized`
			`const String _delimiters; ///< String containing all the delimiter characters`
			`U32String::const_iterator _tokenBegin; ///< Latest found token's begin iterator (Valid after a call to nextToken())`
			`U32String::const_iterator _tokenEnd; ///< Latest found token's end iterator (Valid after a call to nextToken())`
			`};`
DOXYGEN: Add doxygen groups to header files in the common folder Adding @defgroup and @ingroup doxygen tags into all headers in the common folder that contain doxygen blocks. This improves the structure, readability, and findability of information in the resulting output. This commit targets purely structure and does not deal with the content of the currently existing doxygen documentation. 2020-07-08 21:30:36 +00:00
			`/** @} */`

Move StringTokenizer to its own files (tokenizer.h/tokenizer.cpp). svn-id: r48310 2010-03-20 12:56:56 +00:00			`} // End of namespace Common`

			`#endif`