Click here to Skip to main content
13,291,690 members (58,220 online)
Click here to Skip to main content


36 bookmarked
Posted 15 Jun 2002

A Multiple Substring Search Class: CIVStringSet

, 25 Apr 2010
A string array class using MFC or STL that performs very fast multiple string searches
// StringSet.H: interface for the CIVStringSet class.
// Written 30 June 2002 by Scot T Brennecke
// Thanks to Moishe Halibard and Moshe Rubin for their article,
//    "A Multiple Substring Search Algorithm" in the June 2002
//    edition of C/C++ Users Journal.  This class is based on
//    the algorthim therein described, but extended to return
//    all strings and use MFC classes.
// Modified 24 Apr 2010 by Scot T Brennecke

#pragma once

#pragma warning(disable: 4100 4786)
#include <set>
#include <vector>
#include <string>
#include <list>

#ifdef _UNICODE
typedef std::wstring _tstring ;
typedef std::string _tstring ;

struct SEntry
    SEntry( unsigned long dwState = 0UL, unsigned long dwIndex = 0UL )
        : m_dwState( dwState ), m_dwIndex( dwIndex ) {}

    unsigned long m_dwState ;
    unsigned long m_dwIndex ;
} ;

class CIVStringSet : public std::vector<_tstring>
    CIVStringSet( unsigned long dwInitialWidth = 64 ) ;  // Initial width of FSM
    virtual ~CIVStringSet() ;

    bool        Add( LPCTSTR pszWord ) ;                         // a single word
    bool        Add( const _tstring & rstrWord ) ;               // a single word
    size_t      Add( LPCTSTR pszWords, LPCTSTR pszDelims ) ;     // multiple words, delimited with chars from pszDelims
    size_t      Add( LPCTSTR pszzWords, size_t nWords ) ;        // nWords words, each 0 term'd, with extra 0 at end
    size_t      Add( const std::set<_tstring> & sstrWords ) ;    // all the elements of a set of strings
    size_t      Add( const std::vector<_tstring> & astrWords ) ; // all the elements of an array of strings
    size_t      Add( const std::list<_tstring> & lstrWords ) ;   // all the elements of a list of strings

    void        SetDelims( LPCTSTR pszDelims ) { m_strDelims = pszDelims ; } // delimiters to use in word search
    UINT        FindFirstIn( const _tstring & strText, size_t & rnFirst ) ;  // Begin iteration
    UINT        FindNext( size_t & rnNext ) ;                                // Continue iteration

    typedef std::pair<size_t,UINT>              CWordPosPair ;     // first is index of word in array, second is position in text
    typedef std::list< std::pair<size_t,UINT> > CWordPosPairList ; // list of pairs to be returned by FindAllIn
    size_t      FindAllIn( _tstring strText, CWordPosPairList & rlstrWords ) ; // Iterate all at once and make list
    std::vector< std::vector< SEntry > > m_apnFSM ;   // Finite State Machine.
    unsigned long m_dwMaxUsedState ;  // largest state value used
    _tstring      m_strSearch ;       // Current search string
    _tstring      m_strDelims ;       // Current delimiters for search
    UINT          m_nCurTextChar ;    // Current position in search string

    bool          InsertWord( LPCTSTR pszWord, unsigned long dwIndex ) ; // put the new word into the FSM for given index

    // Hide several base class members that shouldn't be used
    // Using these members could cause the index numbers of strings to change, throwing
    //     off the embedded index entries in the FSM
    iterator insert(const_iterator _Where, const _tstring & _Val);
    void insert(const_iterator _Where, size_type _Count, const _tstring & _Val);
    template<class _Iter> void insert(const_iterator _Where, _Iter _First, _Iter _Last);
    template<class _Iter> void _Insert(const_iterator _Where, _Iter _First, _Iter _Last, std::_Int_iterator_tag);
    template<class _Iter> void _Insert(const_iterator _Where, _Iter _First, _Iter _Last, std::input_iterator_tag);
    template<class _Iter> void _Insert(const_iterator _Where, _Iter _First, _Iter _Last, std::forward_iterator_tag);
    void _Reverse(pointer _First, pointer _Last);
    iterator erase(const_iterator _Where);
    iterator erase(const_iterator _First_arg, const_iterator _Last_arg);
} ;

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.


This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


About the Author

Scot Brennecke
Software Developer (Senior) Microsoft
United States United States
Scot is a Sr. Escalation Engineer for the Microsoft Developer Support Languages team. He helps software developers who are Microsoft customers find bugs in their own, or Microsoft's, code.

Scot spends most of his time writing, reading, or thinking about C++ software, thereby classifying him as a geek.

You may also be interested in...

Permalink | Advertise | Privacy | Terms of Use | Mobile
Web03 | 2.8.171207.1 | Last Updated 25 Apr 2010
Article Copyright 2002 by Scot Brennecke
Everything else Copyright © CodeProject, 1999-2017
Layout: fixed | fluid