Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version

A Multiple Substring Search Class: CIVStringSet

, 25 Apr 2010 CPOL
A string array class using MFC or STL that performs very fast multiple string searches
civstringset.zip
Write Triangle
Test
QueryBuilder
civstringset_demo.zip
StringSetSample
res
StringSetSample.ico
CIVStringSet_MFC_Source.zip
civstringset_source.zip
IVCode
IVStringSet
MFC
STL
CIVStringSet_STL_Source.zip
// StringSet.H: interface for the CIVStringSet class.
//
// Written 12 June 2002 by Scot T Brennecke
// Thanks to Moishe Halibard and Moshe Rubin for their article,
//    "A Multiple Substring Search Algorithm" in the June 2002
//    edition of C/C++ Users Journal.  This class is based on
//    the algorthim therein described, but extended to return
//    all strings and use MFC classes.
//
// Modified 24 Apr 2010 by Scot T Brennecke

#pragma once

#pragma warning(disable: 4100 4786)
#include <set>
#include <vector>
#include <list>

struct SEntry
{
    SEntry( DWORD dwState = 0UL, DWORD dwIndex = 0UL )
        : m_dwState( dwState ), m_dwIndex( dwIndex ) {}

    DWORD m_dwState ;
    DWORD m_dwIndex ;
} ;

class CIVStringSet : public CStringArray
{
public:
    CIVStringSet( DWORD dwInitialWidth = 64 ) ;  // Initial width of FSM
    virtual ~CIVStringSet() ;

    bool     Add( LPCTSTR pszWord ) ;                     // a single word
    bool     Add( const CString & rstrWord ) ;            // a single word
    size_t   Add( LPCTSTR pszWords, LPCTSTR pszDelims ) ; // multiple words, delimited with chars from pszDelims
    size_t   Add( LPCTSTR pszzWords, size_t nWords ) ;    // nWords words, each 0 term'd, with extra 0 at end
    size_t   Add( const std::set<CString> & sstrWords ) ; // all the elements of a set of CStrings
    size_t   Add( const CStringArray & astrWords ) ;      // all the elements of a CStringArray
    size_t   Add( const CStringList & lstrWords ) ;       // all the elements of a CStringList

    void     SetDelims( LPCTSTR pszDelims ) { m_strDelims = pszDelims ; } // delimiters to use in word search
    UINT     FindFirstIn( CString strText, size_t & rnFirst ) ; // Begin iteration
    UINT     FindNext( size_t & rnNext ) ;                      // Continue interation

    typedef std::pair<size_t,UINT>              CWordPosPair ;     // first is index of word in array, second is position in text
    typedef std::list< std::pair<size_t,UINT> > CWordPosPairList ; // list of pairs to be returned by FindAllIn
    size_t   FindAllIn( CString strText, CWordPosPairList & rlstrWords ) ; // Iterate all at once and make list

protected:
    std::vector< std::vector< SEntry > > m_apnFSM ;   // Finite State Machine.
    DWORD    m_dwMaxUsedState ; // largest state value used
    CString  m_strSearch ;      // Current search string
    CString  m_strDelims ;      // Current delimiters for search
    UINT     m_nCurTextChar ;   // Current position in search string

    bool     InsertWord( LPCTSTR pszWord, DWORD wIndex ) ; // put the new word into the FSM for given index

private:
    // Hide several base class members that shouldn't be used
    // Using these members could cause the index numbers of strings to change, throwing
    //     off the embedded index entries in the FSM
    void SetAt( int nIndex, LPCTSTR newElement ) ;
    void SetAt( int nIndex, const CString & newElement ) ;
    void SetAtGrow( int nIndex, LPCTSTR newElement ) ;
    void SetAtGrow( int nIndex, const CString & newElement ) ;
    int Append( const CStringArray & src ) ;
    void Copy( const CStringArray & src ) ;
    void InsertAt( int nIndex, LPCTSTR newElement, int nCount = 1 ) ;
    void InsertAt( int nIndex, const CString & newElement, int nCount = 1 ) ;
    void RemoveAt( int nIndex, int nCount = 1 ) ;
    void InsertAt( int nStartIndex, CStringArray * pNewArray ) ;
} ;

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Scot Brennecke
Software Developer (Senior) Microsoft
United States United States
Scot is an Escalation Engineer for the Microsoft Developer Support Languages team. He helps software developers who are Microsoft customers find bugs in their own, or Microsoft's, code.
 
Scot spends most of his time writing, reading, or thinking about C++ software, thereby classifying him as a geek.

| Advertise | Privacy | Mobile
Web01 | 2.8.141022.1 | Last Updated 25 Apr 2010
Article Copyright 2002 by Scot Brennecke
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid