Click here to Skip to main content
15,895,667 members
Articles / Programming Languages / C

A Tiny Variable String Splitter

Rate me:
Please Sign up or sign in to vote.
4.38/5 (10 votes)
25 Jan 2008CPOL8 min read 32.7K   221   16  
Tokenize and access string contents using a format mask
//
//  File:           StringSplitter.cpp
//
//  Author:         Michael Stellmann
//  Copyright:      2008 by Michael Stellmann
//
//  Description:    Extract and access placeholders from a string
//                  using a format mask.
//
//                  WARNING: No additional error checking is done!
//
//  Last modified:  2008-01-21
//
//  Modification history: 
//  2008-01-21		MS			1.0		Initial release
//

#include "stdafx.h"
#include "StringSplitter.h"

CSearchString::CSearchString( const TCHAR *pszString )
{
	nLen_ = _tcslen( pszString );
	pszString_ = new TCHAR[ nLen_ + 1 ];
	_tcscpy_s( pszString_, nLen_ + 1, pszString );
	findReset();
}

CSearchString::~CSearchString()
{
	delete[] pszString_;
}

void CSearchString::findReset()
{
	SearchPos_ = pszString_;
	EndPos_ = SearchPos_ +  _tcslen( SearchPos_ );
}

// find next matching character and return absolute offset
// or -1 if not found
const TCHAR *CSearchString::findNext( TCHAR cFind )
{
	if ( SearchPos_ )
	{
		// *** Start condition
		// we always search from the next possible position except when
		// - we start searching and first character is not cFind or
		// - we are at the end
		if ( !( SearchPos_ == pszString_ && *SearchPos_ != cFind ) && SearchPos_ != EndPos_ ) ++SearchPos_;


		// search next character after last found position,
		TCHAR *FoundPos = _tcschr( SearchPos_, cFind );

		// avoid wildcard on first character (there's nothing to copy yet)
		if ( FoundPos != SearchPos_ )
		{
			if ( FoundPos )
			{
				// if found, store last part between the last found character and the current
				TCHAR c = *FoundPos;
				*FoundPos = _T('\0');
				strLastPart_ = SearchPos_;
				*FoundPos = c;
			} else {
				strLastPart_ = SearchPos_;
			}
		}

		// *** End condition (FoundPos == 0 )
		if ( FoundPos == 0 )
		{
			// if we searched from the end, terminate
			// otherwise search again from last character
			if ( SearchPos_ == EndPos_ )
			{
				SearchPos_ = FoundPos;
			} else {
				SearchPos_ = EndPos_;
			}
		} else {
			SearchPos_ = FoundPos;
		}
	}

	return SearchPos_;
}

int CSearchString::findNext( const TCHAR *pszFind )
{
	TCHAR *FoundPos = _tcsstr( SearchPos_, pszFind );

	if ( FoundPos )
	{
		// if found, store last part between the last found character and the current
		TCHAR c = *FoundPos;
		*FoundPos = _T('\0');
		strLastPart_ = SearchPos_;
		*FoundPos = c;
	} else {
		strLastPart_ = SearchPos_;
	}

	if ( FoundPos == 0 )
	{
		// if we searched from the end, terminate
		// otherwise search again from last character
		if ( SearchPos_ == EndPos_ )
		{
			SearchPos_ = FoundPos;
		} else {
			SearchPos_ = EndPos_;
		}
	} else {
		SearchPos_ = FoundPos + _tcslen( pszFind );
	}

	return FoundPos ? (int)( FoundPos - pszString_ ) : -1;
}

const TCHAR * CSearchString::getLastPart()
{
	return strLastPart_;
}

bool CSearchString::startsWithPlaceholder( TCHAR cFind )
{
	return *pszString_ == cFind;
}

const TCHAR * CSearchString::getSearchPos()
{
	return SearchPos_;
}

bool CStringSplitter::matchPatterns( const TCHAR *pszSource, const TCHAR *pszPattern )
{
	// first splice pattern
	CSearchString Pattern( pszPattern );
	SSearchPattern pat;
	vector<SSearchPattern> vPattern;

	// create array of parts
	// we store both, placeholder-name and fix contents in strPlaceholderName
	pat.bIsPlaceholder = Pattern.startsWithPlaceholder( _T('%') );
	while ( Pattern.findNext( _T('%') ) )
	{
		pat.strPlaceholderName = Pattern.getLastPart();
		vPattern.push_back( pat );
		pat.bIsPlaceholder = !pat.bIsPlaceholder;
	}

	size_t i,
		nPatterns = vPattern.size();

	CSearchString String( pszSource );
//	vector<CString> vstrParts;
	vSplitResult_.clear();
	SSplitResult SplitResult;
	SSearchPattern *pPat;
	int nFoundOffset;
	for ( i = 0; i < nPatterns; ++i )
	{
		pPat = &vPattern[ i ];
		if ( !pPat->bIsPlaceholder )
		{
			const TCHAR *pszFixPart = pPat->strPlaceholderName;
			nFoundOffset = String.findNext( pszFixPart );

			if ( nFoundOffset == -1 ) 
			{
				vSplitResult_.clear();
				return false;
			}

			// if search patters was found, add it
			if ( nFoundOffset )
			{
				SplitResult.strPlaceholderName = vPattern[ i - 1 ].strPlaceholderName;
				SplitResult.strValue = String.getLastPart();
				vSplitResult_.push_back( SplitResult ); // Placeholder
			}
			SplitResult.strPlaceholderName = _T("");
			SplitResult.strValue = pszFixPart;
			vSplitResult_.push_back( SplitResult ); // fixed part
		}
	}

	// *** end condition: last part was wildcard
	if ( pPat->bIsPlaceholder )
	{
		SplitResult.strPlaceholderName = vPattern[ i - 1 ].strPlaceholderName;
		SplitResult.strValue = String.getSearchPos();
		vSplitResult_.push_back( SplitResult ); // wildcard
	}

	return true;
}

bool CStringSplitter::getValue( OUT CString &strValue, const TCHAR *pszPlaceholderName )
{
	bool bRet = false;
	tvSplitResult::const_iterator begin = vSplitResult_.begin(),
		end = vSplitResult_.end();

	while ( begin != end )
	{
		if ( _tcsicmp( begin->strPlaceholderName, pszPlaceholderName ) == 0 )
		{
			strValue = begin->strValue;
			bRet = true;
			break;
		}
		++begin;
	}

	// clear value if placeholder was not found
	if ( !bRet ) strValue = _T("");

	return bRet;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Germany Germany
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions