|
//**************************************************************************************************************************
//* Blue - General Purpose C++ Library
//* Copyright (c) 2002-2003 Josh Harler
//*
//* This software is provided 'as-is', without any express or implied warranty. In no event
//* will the authors be held liable for any damages arising from the use of this software.
//*
//* Permission is granted to anyone to use this software for any purpose, including commercial
//* applications, and to alter it and redistribute it freely, subject to the following restrictions:
//*
//* 1. The origin of this software must not be misrepresented; you must not claim that you
//* wrote the original software. If you use this software in a product, an acknowledgment in the
//* product documentation would be appreciated but is not required.
//*
//* 2. Altered source versions must be plainly marked as such, and must not be misrepresented as
//* being the original software.
//*
//* 3. This notice may not be removed or altered from any source distribution.
//*
//*
//* file Util/StringTokenizer.h
//**
#ifndef __blue_util_StringTokenizer_h_included__
#define __blue_util_StringTokenizer_h_included__
// Public Headers ==========================================================================================================
#include "Blue.h"
#include "Array.h"
// Public Defines/Enums/Typedefs/Etc. ======================================================================================
// Public Classes/Structs ==================================================================================================
namespace blue {
namespace util {
/**
* \class StringTokenizer
* \brief Tokenizes a string.
* \ingroup Util
*
* Complex strings can be tokenized fairly easily using containers. If
* the string shouldn't be delimited by items appearing in quotes (""),
* then quotes should be added as a container. Take the following string
* for example:
*
* <code>Text "quoted text" more text</code>
*
* If that string was tokenized on spaces (" "), the results would be
* this:
*
* <code> "Text" - ""quoted" - "text"" - "more" - "text" </code>
*
* Odds are, this isn't what was wanted. To ignore everything inside of
* the quotes, add them as exclusive containers. Once that is done, the
* results on tokenizing would be this:
*
* <code> "Text" - ""quoted text"" - "more" - "text" </code>
*
* This result is probably what was wanted.
*
* A containers can be one of two types. It can be exclusive, which
* means everything after the container is ignored until the container
* ends. If a container is not exclusive, then it is normal. This means
* that other containers can appear between the beginning and end of
* the container.
*
* The following examples show how containers can be used to change
* the results of tokenizing a string. The string being tokenized
* is: <code>Text "quoted (parens ")")" text</code>.
*
* Parsing using normal containers:
* <code>
* <br>container: begin " end " NORMAL
* <br>container: begin ( end ) NORMAL
* <br>tokenized: "Text" - "quoted (parens ")")" - "text"
* <br>
* </code>
*
* Parsing using exclusive containers:
* <code>
* <br>container: begin " end " EXCLUSIVE
* <br>container: begin ( end ) EXCLUSIVE
* <br>tokenized: "Text" - "quoted (parens ")")" - "text"
* <br>
* </code>
*
* Parsing using a combination of exclusive and normal containers:
* <code>
* <br>container: begin " end " NORMAL
* <br>container: begin ( end ) EXCLUSIVE
* <br>tokenized: "Text" - ""quoted (parens ")")" text"
* <br>
* </code>
*
* <code>
* <br>container: begin " end " EXCLUSIVE
* <br>container: begin ( end ) NORMAL
* <br>tokenized: "Text" - ""quoted (parens ")")"" - "text"
* <br>
* </code>
*
*/
class StringTokenizer
{
public:
// ===========================================================
// creation/destruction
// ===========================================================
/** Constructor. */
StringTokenizer();
/** Copy constructor. */
StringTokenizer( const StringTokenizer& copy );
/** Destructor. */
~StringTokenizer();
// ===========================================================
// query
// ===========================================================
/** Flags used when tokenizing the string. */
enum flags_e
{
KEEP_DELIMITERS = (1<<0), //!< Returns the delimiters as well as everything inbetween
TRIM_RESULTS = (1<<1), //!< Trims the items that fall between the delimiters
NO_WHITESPACE = (1<<2), //!< Does not add all-whitespace tokens (still returns whitespace delimiters)
};
/**
* Tokenizes the given string on the given delimiters using the given flags.
*/
Array<BString> tokenize( BString str, BString delimiter, flags32_t flags = 0 );
// ===========================================================
// manipulation
// ===========================================================
enum container_type_e
{
EXCLUSIVE = 1, //!< No other containers are looked for until this one closes
NORMAL = 2, //!< Other containers may occur within this container
};
/**
* Adds a container to use when tokenize.
*/
void addContainer( BString begin, BString end, container_type_e type );
private:
struct container
{
BString m_beg;
BString m_end;
container_type_e m_type;
};
Array<container> m_containers;
};
}} // namespaces
// Public External Variables ===============================================================================================
// Public Function Prototypes ==============================================================================================
// Public Inline Functions =================================================================================================
#endif // include guard
|
By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.
If a file you wish to view isn't highlighted, and is a text file (not binary), please
let us know and we'll add colourisation support for it.
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.