Works quite well. Nice job!
I would make another suggestion or two: separate the specification of the delimiters from the call to Split. There is more than a trivial amount of overhead to get the delimiters set up, and you might want to allow "reuse" of the same delimiters for a bunch of calls.
I was also wondering with the CString parameters if you want to pass by reference or value ... or whether it makes any difference ... you pass the CStringArray by reference, but not the two CString parameters.
void CTokenEx::Split(CString Source, CString Deliminator, CStringArray& AddIt, BOOL bAddEmpty)
or
void CTokenEx::Split(CString& Source, CString& Deliminator, CStringArray& AddIt, BOOL bAddEmpty)
I tried making the change .... odd ... the Source parameter can be passed by reference, but not the Delimiter parameter. (but there is LOT about MFC that I don't understand )
I applied the following test cases to the revised code to only allow A-Z and a-z, and it passes just fine. Sweet!
struct s_testTokenizer {
int expectedTokens;
char* pActualPattern;
char* pExpectedPattern;
};
struct s_testTokenizer testTokenizer[] = {
{ 1, "a", "1 <1 a>"},
{ 3, "a b c", "3 <1 a><1 b><1 c>"},
{ 3, " a b c ", "3 <1 a><1 b><1 c>"},
{ 3, "one two three", "3 <3 one><3 two><5 three>"},
{ 3, "one\ttwo\tthree", "3 <3 one><3 two><5 three>"},
{ 3, "one,two,,,, ,,, three,,,", "3 <3 one><3 two><5 three>"},
{ 3, " one two three", "3 <3 one><3 two><5 three>"},
{ 3, " one\ttwo\tthree", "3 <3 one><3 two><5 three>"},
{ 3, " one\ntwo\nthree", "3 <3 one><3 two><5 three>"},
{ 3, " one,two,,,, ,,, three,,,", "3 <3 one><3 two><5 three>"},
{ 3, " one two three ", "3 <3 one><3 two><5 three>"},
{ 3, " one\ttwo\tthree ", "3 <3 one><3 two><5 three>"},
{ 3, " one,two,,,, ,,, three,,, ", "3 <3 one><3 two><5 three>"},
{ 3, "one two three ", "3 <3 one><3 two><5 three>"},
{ 3, "one\ttwo\tthree", "3 <3 one><3 two><5 three>"},
{ 3, " one\ttwo\tthree ", "3 <3 one><3 two><5 three>"},
{ 3, " one\ttwo\tthree", "3 <3 one><3 two><5 three>"},
{ 3, "one\ttwo\tthree ", "3 <3 one><3 two><5 three>"},
{ 3, "\tone\ttwo\tthree ", "3 <3 one><3 two><5 three>"},
{ 3, "\tone\ttwo\tthree\t", "3 <3 one><3 two><5 three>"},
{ 3, "one,two,,,, ,,, three,,, ", "3 <3 one><3 two><5 three>"},
{ 3, " one \t two \t three ", "3 <3 one><3 two><5 three>"},
{ 0, "", "0 "},
{ 0, "1", "0 "},
{ 0, " \t ", "0 "},
{ 0, "123", "0 "},
{ 4, "a1b2c3d", "4 <1 a><1 b><1 c><1 d>"},
{ 4, " a1b2c3d ", "4 <1 a><1 b><1 c><1 d>"},
{ 4, " a1bb2c3d ", "4 <1 a><2 bb><1 c><1 d>"},
{ 4, "a1bb2c3d", "4 <1 a><2 bb><1 c><1 d>"},
{ 4, "a1bb2c3d ", "4 <1 a><2 bb><1 c><1 d>"},
{ 4, " a1bb2c3d", "4 <1 a><2 bb><1 c><1 d>"},
{ 1, " 12abc345 ", "1 <3 abc>"},
{ 1, " 12abc345", "1 <3 abc>"},
{ 1, "12abc345 ", "1 <3 abc>"},
{ 1, "12abc345", "1 <3 abc>"},
{ 2, "12abc345defg678", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg678", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg678", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg678", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, " 12 abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, "12 abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, "12 abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, " 12 abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg ", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg ", "2 <3 abc><4 defg>"},
{ 2, "12abc345defg ", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg ", "2 <3 abc><4 defg>"},
{ 2, " 12abc345defg ", "2 <3 abc><4 defg>"},
{ 2, " 12 abc 345 defg ", "2 <3 abc><4 defg>"},
{ 2, "12 abc 345 defg ", "2 <3 abc><4 defg>"},
{ 2, "12 abc 345 defg ", "2 <3 abc><4 defg>"},
{ 2, " 12 abc 345 defg ", "2 <3 abc><4 defg>"},
{ 2, "abc345defg678", "2 <3 abc><4 defg>"},
{ 2, " abc345defg678", "2 <3 abc><4 defg>"},
{ 2, " abc345defg678", "2 <3 abc><4 defg>"},
{ 2, " abc345defg678", "2 <3 abc><4 defg>"},
{ 2, "abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, "abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, "abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, " abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, " abc345defg678 ", "2 <3 abc><4 defg>"},
{ 2, " abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, " abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, " abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, " abc 345 defg 678 ", "2 <3 abc><4 defg>"},
{ 2, " 00 11 2 3 4 5 6 77 88 99 aa bb ","2 <2 aa><2 bb>"},
{ 9, " aa bb c d e f g hh ii ", "9 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii>"},
{10, " aa bb c d e f g hh ii jj ", "10 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj>"},
{11, " aa bb c d e f g hh ii jj kk ", "11 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk>"},
{12, " aa bb c d e f g hh ii jj kk ll ","12 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk><2 ll>"},
{ 9, "aa bb c d e f g hh ii ", "9 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii>"},
{10, "aa bb c d e f g hh ii jj ", "10 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj>"},
{11, "aa bb c d e f g hh ii jj kk ", "11 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk>"},
{12, "aa bb c d e f g hh ii jj kk ll ", "12 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk><2 ll>"},
{ 9, " aa bb c d e f g hh ii", "9 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii>"},
{10, " aa bb c d e f g hh ii jj", "10 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj>"},
{11, " aa bb c d e f g hh ii jj kk", "11 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk>"},
{12, " aa bb c d e f g hh ii jj kk ll", "12 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk><2 ll>"},
{ 9, "aa bb c d e f g hh ii", "9 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii>"},
{10, "aa bb c d e f g hh ii jj", "10 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj>"},
{11, "aa bb c d e f g hh ii jj kk", "11 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk>"},
{12, "aa bb c d e f g hh ii jj kk ll", "12 <2 aa><2 bb><1 c><1 d><1 e><1 f><1 g><2 hh><2 ii><2 jj><2 kk><2 ll>"},
};
int testCount = sizeof(testTokenizer) / sizeof(testTokenizer[0]);
printf("TestCount: %d\n", testCount);
CString m_deliminator = " ,.0123456789;:-_+=\n\t\r";
char actualTokenStrs[200];
char innerTokenStr[100];
for (int test = 0; test < testCount; ++test) {
CTokenEx tok;
char* pActualPattern = testTokenizer[test].pActualPattern;
char* pExpectedPattern = testTokenizer[test].pExpectedPattern;
CString csSplit(pActualPattern);
CStringArray splitIt;
tok.Split(csSplit, m_deliminator, splitIt, FALSE);
int size = splitIt.GetSize();
sprintf(actualTokenStrs, "%d ", size);
for (int iNum = 0; iNum < size; ++iNum) {
sprintf(innerTokenStr, "<%d %s>", splitIt[iNum].GetLength(), (LPCTSTR)(splitIt[iNum]));
strcat(actualTokenStrs, innerTokenStr);
}
if (strcmp(testTokenizer[test].pExpectedPattern, actualTokenStrs) != 0) {
printf("\nTokenizer problem: \nInput: [%s]\nExpect: [%s]\nActual: [%s]\n\n",
pActualPattern, pExpectedPattern, actualTokenStrs);
errorEncountered++;
}
else {
printf("OK: [%s] --> [%s]\n", pActualPattern, actualTokenStrs);
}
}
if (errorEncountered == 0) {
printf("\nSuccess if this prints out\n");
}
|