Click here to Skip to main content
Click here to Skip to main content

Finding a substring in a text

, 15 Aug 2011 CPOL
Rate this:
Please Sign up or sign in to vote.
How to find a substring in a text, forward and backward, with Case Sensitive and Match Whole Word options.

Here's how to find a substring into a text, forward and backward, with Case Sensitive and Match Whole Word options (the main function is RichEditFind). If bMatchCase is false, it changes the text case to lower so that we can find a case insensitive match (if it is true, it doesn't alter the text). If bWholeWord is true, it is trying to find a match for which the previous and the next characters are separators. Separators are any characters that are not alphanumerical letters (a-z, A-Z, 0-9). If bWholeWord is false, the previous and next characters are ignored (can be alphanumerical). nPosition specifies the start position for finding a substring in the text. When bReverse is false, CString's Find function is used to find a substring in the text, otherwise the ReverseFind function is used (because the CString class can only search characters in a string).

BOOL IsSeparator(CString strRichEdit, CString strFindText, int nPosition)
{
   // this function is used to check if the previous and next characters are alfanumeric 
   int lenSub = strFindText.GetLength();
   int len = strRichEdit.GetLength();
   int nPrevChar = nPosition - 1;
   int nNextChar = nPosition + lenSub;
 
   // does the previous character is separator?
   if (nPosition > 0)
   {
      if ((strRichEdit.GetAt(nPrevChar) >= _T('A')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('Z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nPrevChar) >= _T('a')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nPrevChar) >= _T('0')) &&
         (strRichEdit.GetAt(nPrevChar) <= _T('9')))
      {
         return FALSE;
      }
   }
 
   // does the next character is separator?
   if (nNextChar < len)
   {
      if ((strRichEdit.GetAt(nNextChar) >= _T('A')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('Z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nNextChar) >= _T('a')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('z')))
      {
         return FALSE;
      }
      if ((strRichEdit.GetAt(nNextChar) >= _T('0')) &&
         (strRichEdit.GetAt(nNextChar) <= _T('9')))
      {
         return FALSE;
      }
   }
   return TRUE; // the character before the substring and
                // the character after the substring are separators
}
 
int ReverseFind(LPCTSTR lpszData, LPCTSTR lpszSub, int startpos)
{
   // this function is used to find lpszSub
   // substring in reverse order into lpszData 
   int lenSub = lstrlen( lpszSub );
   int len = lstrlen( lpszData );
 
   if (0 < lenSub && 0 < len)
   {
      if (startpos == -1 || startpos >= len) startpos = len - 1;
      for (LPCTSTR lpszReverse = lpszData + startpos; 
         lpszReverse != lpszData; --lpszReverse)
         if (_tcsncmp(lpszSub, lpszReverse, lenSub ) == 0)
            return (lpszReverse - lpszData);
   }
   return -1;
}
 
int RichEditFind(CString strRichEdit, CString strFindText,
   int nPosition, BOOL bReverse, BOOL bMatchCase, BOOL bWholeWord)
{
   // this function does the actual search with
   // Case Sensitive and Match Whole Word options
   if (nPosition < 0)
      nPosition = strRichEdit.GetLength() - 1;
 
   if (bReverse) // searching a substring in reverse order?
   {
      if (bMatchCase) // is Case Sensitive option enabled?
      {
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = ReverseFind(strRichEdit, strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = ReverseFind(strRichEdit, strFindText, --nRetVal);
            }
         }
         else
         {
            return ReverseFind(strRichEdit, strFindText, nPosition);
         }
      }
      else
      {
         strRichEdit.MakeLower();
         strFindText.MakeLower();
 
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = ReverseFind(strRichEdit, strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = ReverseFind(strRichEdit, strFindText, --nRetVal);
            }
         }
         else
         {
            return ReverseFind(strRichEdit, strFindText, nPosition);
         }
      }
   }
   else // normal search
   {
      if (bMatchCase) // is Case Sensitive option enabled?
      {
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = strRichEdit.Find(strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = strRichEdit.Find(strFindText, ++nRetVal);
            }
         }
         else
         {
            return strRichEdit.Find(strFindText, nPosition);
         }
      }
      else
      {
         strRichEdit.MakeLower();
         strFindText.MakeLower();
 
         if (bWholeWord) // is Match Whole Word option enabled?
         {
            int nRetVal = strRichEdit.Find(strFindText, nPosition);
            while (nRetVal != -1)
            {
               if (IsSeparator(strRichEdit, strFindText, nRetVal))
                  return nRetVal;
               nRetVal = strRichEdit.Find(strFindText, ++nRetVal);
            }
         }
         else
         {
            return strRichEdit.Find(strFindText, nPosition);
         }
      }
   }
   return -1;
}

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Mihai MOGA
Architect Printec Group Romania SRL
Romania Romania
My professional background includes knowledge of analyst programmer for Microsoft Visual C++, Microsoft Visual C#, Microsoft Visual Basic, Sun Java, assembly for Intel 80x86 microprocessors, assembly for PIC microcontrollers (produced by Microchip Inc.), relational databases (MySQL, Oracle, SQL Server), concurrent version systems, bug tracking systems, web design (XHTML, CSS, XML, PHP/MySQL, JavaScript, Flash).
 
If you like my articles, please visit my website for more: http://www.mihaimoga.com/
Follow on   Twitter   Google+   LinkedIn

Comments and Discussions

 
GeneralReason for my vote of 2 Grossly inefficient code PinmemberMember 793689722-Aug-11 16:52 

General General    News News    Suggestion Suggestion    Question Question    Bug Bug    Answer Answer    Joke Joke    Rant Rant    Admin Admin   

Use Ctrl+Left/Right to switch messages, Ctrl+Up/Down to switch threads, Ctrl+Shift+Left/Right to switch pages.

| Advertise | Privacy | Terms of Use | Mobile
Web02 | 2.8.141223.1 | Last Updated 15 Aug 2011
Article Copyright 2011 by Mihai MOGA
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid