|
|
First of all I like this code, it is small and fully stand-alone.
I have modified it, because I need an additional wildcard joker that represents digits. Finally the modified function accepts '*', '?' and '#' as joker characters.
int wildcmp_ex(const char *wild, const char *string) {
const char *cp = NULL, *mp = NULL;
while (*string) {
if (*wild == '*') {
if (!*++wild) {
return 1;
}
mp = wild;
cp = string+1;
} else if (((*wild == *string) && (*wild != '#')) || (*wild == '?') || ((*wild == '#') && isdigit(*string))) {
wild++;
string++;
} else {
if (mp)
{
wild = mp;
string = cp++;
}
else
{
return 0;
}
}
}
while (*wild == '*') {
wild++;
}
return !*wild;
}
Thomas Haase
modified 29-Sep-11 8:26am.
|
|
|
|
|
Hi Jack Handy,
Is there a licence attached to this code?
Thanks, Mark
|
|
|
|
|
Just for fun... a C# version with almost the same syntax as the original C version
public static bool wildcmp(string pattern, string text) {
var wild = new StringScanner(pattern);
var @string = new StringScanner(text);
var mp = wild;
var cp = @string;
while (@string && wild != '*') {
if (wild != @string && wild != '?') {
return false;
}
wild++;
@string++;
}
while (@string) {
if (@wild == '*') {
if (!++wild) {
return true;
}
mp = wild;
cp = @string + 1;
} else if (wild == @string || wild == '?') {
wild++;
@string++;
} else {
wild = mp;
@string = cp++;
}
}
while (wild == '*') {
wild++;
}
return !wild;
}
public struct StringScanner
{
private string _string;
private int _position;
public StringScanner(string s)
{
_string = s;
_position = 0;
}
public string String
{
get { return _string; }
}
public int Position
{
get { return _position; }
}
public bool Finished
{
get { return _position == _string.Length;}
}
public char Current
{
get { return Finished ? '\0' : _string[_position]; }
}
public bool MoveNext()
{
if (Finished)
return false;
_position++;
return true;
}
public static StringScanner operator ++(StringScanner scanner)
{
scanner.MoveNext();
return scanner;
}
public static StringScanner operator +(StringScanner scanner, int n)
{
return new StringScanner(scanner.String)
{
_position = Math.Min(scanner.Position + n, scanner.String.Length)
};
}
public static implicit operator bool(StringScanner scanner)
{
return !scanner.Finished;
}
public static implicit operator char(StringScanner scanner)
{
return scanner.Current;
}
public static bool operator ==(StringScanner scanner1, StringScanner scanner2)
{
return scanner1.Current == scanner2.Current;
}
public static bool operator !=(StringScanner scanner1, StringScanner scanner2)
{
return scanner1.Current != scanner2.Current;
}
}
|
|
|
|
|
I've been using this for years, just don't show it to your instructor.
BOOL wm(const char *s, const char *t)
{
return *t-'*' ? *s ? (*t=='?') | (toupper(*s)==toupper(*t)) && wm(s+1,t+1) : !*t : wm(s,t+1) || *s && wm(s+1,t);
}
If you want case sensitive, remove the toupper() calls.
|
|
|
|
|
This strikes me as an obvious place to use recursion. So here goes...
public class MString
{
public static bool CompareWWc(string strA, string strB, bool ignoreCase)
{
if (ignoreCase)
return CompareWWc(strA.ToLower(), strB.ToLower());
else
return CompareWWc(strA, strB);
}
public static bool CompareWWc(string strA, string strB)
{
for (int i = 0; i < strA.Length; i++)
{
if (strA[i] == '*')
{
if (i == strA.Length - 1)
return true;
strA = strA.Substring(i + 1);
for (int j = i; j < strB.Length; j++)
if (CompareWWc(strA, strB.Substring(j)))
return true;
return false;
}
if (i >= strB.Length || (strA[i] != strB[i] && strA[i] != '?'))
return false;
}
return strA.Length == strB.Length;
}
}
And here's a little test sequence:
if (!MString.CompareWWc("", ""))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("something", "something"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("something", "zomething"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("something", "some"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("something", "something else"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("s?m?th???", "something"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("s?m?th???", "somethin"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("*", ""))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("*", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("non*", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("*nonsense", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("non*nse", "nonsense"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("non*nse", "nonsenze"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("non*n?e", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("n*on*nse", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("n*n*nse", "nonsense"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("*non*nse", "nonsenze"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("n*n*n?e", "nonsense"))
Console.WriteLine("Something wrong!");
}
By the way, the name CompareWWc means Compare With Wildcards.
|
|
|
|
|
Actually, the recursive function together with substring will make this slow.
I'm using this at the moment:
public static class StringExtensions
{
public static bool WildcardMatch(this string str, string compare, bool ignoreCase)
{
if (ignoreCase)
return str.ToLower().WildcardMatch(compare.ToLower());
else
return str.WildcardMatch(compare);
}
public static bool WildcardMatch(this string str, string compare)
{
if (string.IsNullOrEmpty(compare))
return str.Length == 0;
int pS = 0;
int pW = 0;
int lS = str.Length;
int lW = compare.Length;
while (pS < lS && pW < lW && compare[pW] != '*')
{
char wild = compare[pW];
if (wild != '?' && wild != str[pS])
return false;
pW++;
pS++;
}
int pSm = 0;
int pWm = 0;
while (pS < lS && pW < lW)
{
char wild = compare[pW];
if (wild == '*')
{
pW++;
if (pW == lW)
return true;
pWm = pW;
pSm = pS + 1;
}
else if (wild == '?' || wild == str[pS])
{
pW++;
pS++;
}
else
{
pW = pWm;
pS = pSm;
pSm++;
}
}
while (pW < lW && compare[pW] == '*')
pW++;
return pW == lW && pS == lS;
}
}
|
|
|
|
|
Hi Erwin,
Thanks for your posting. It did make me decide to investigate the situation.
I still really think this is a situation that begs for recursion. But maybe you were right that substring is not a good idea. So I made this version:
public class MString2
{
public static bool CompareWWc(string strA, string strB, bool ignoreCase)
{
if (ignoreCase)
return CompareWWc(strA.ToLower(), 0, strB.ToLower(), 0);
else
return CompareWWc(strA, 0, strB, 0);
}
public static bool CompareWWc(string strA, string strB)
{
return CompareWWc(strA, 0, strB, 0);
}
private static bool CompareWWc(string strA, int indexA, string strB, int indexB)
{
for (int i = 0; indexA + i < strA.Length; i++)
{
if (strA[indexA + i] == '*')
{
if (indexA + i == strA.Length - 1)
return true;
for (int j = indexB + i; j < strB.Length; j++)
if (CompareWWc(strA, indexA + i + 1, strB, j))
return true;
return false;
}
if (indexB + i >= strB.Length || (strA[indexA + i] != strB[indexB + i] && strA[indexA + i] != '?'))
return false;
}
return strA.Length - indexA == strB.Length - indexB;
}
}
Then I ran some timing tests, using System.Diagnostics.Stopwatch. I put my test case with 19 calls to the function in a loop and executed it 10,000 times. I did this for my original version, your version, and my new version. I compiled the programs in Release mode.
Assuming I haven't made a mistake somewhere, here are my results for a single function call:
My original version: 342 nonoseconds
Your version: 237 nanoseconds
My second version: 279 nanoseconds
Now to tell you the truth, I find it very difficult to get excited about saving 100 nanoseconds at the expense of having two and a half times as many lines of code. Especially since my expected use of this function in my application will probably never exceed a couple hundred calls per day.
Anyway, thanks for getting me to think things over again and make the tests. Personally, at least in this particular case, I prefer programmer understandability to execution efficiency. I've decided to stick with my original version, since I think my second version is more difficult to understand, and the improved efficiency not worth that disadvantage.
|
|
|
|
|
Hi Erwin,
Sorry - my previous numbers are not correct. I was running the programs under the Visual Studio debugger, and that was apparently not good for timing tests.
Here's what I get now:
My original version: 243 nonoseconds
Your version: 76 nanoseconds
My second version: 111 nanoseconds
Assuming these timings are valid, your version is three times faster than my original version, and that is pretty significant, at least in a situation were the function may be used millions times a day.
Sorry for the incorrect timings in my previous posting.
|
|
|
|
|
Yes, the recursive function makes it more understandable for sure. In my case I actually call it several thousands of times after certain user actions, so I'm even considering using unsafe code I also thought of a special case where your function will get a performance hit: SearchString = "--ABC-----ABC-----ABC-----lots of text (without 'at') goes here" , wildcardString = "*ABC*@" . In this case my function (based on Jack's) will search for the '@' character once starting from position 5 (but won't find it, because it's not there). With your function it would search for the '@' character 3 times (once starting from position 5 until the end, once from 13 and once from 21). The longer the text at the end or the more occurances of 'ABC' at the start, the greater the performance hit.
|
|
|
|
|
If at first you don't succeed...
Here's my third version, where I say to hell with minimizing lines of code and try to optimize the speed. No "unsafe" code though, unless you consider "goto" to be unsafe coding.
public class MString
{
public static bool CompareWWc(string strA, string strB, bool ignoreCase)
{
if (ignoreCase)
return CompareWWc(strA.ToLower(), strB.ToLower());
else
return CompareWWc(strA, strB);
}
public static bool CompareWWc(string strA, string strB)
{
int starPtr = 0;
if (strB.Length >= strA.Length)
{
for (;; starPtr++)
{
if (starPtr == strA.Length)
return strA.Length == strB.Length;
if (strA[starPtr] == '*')
goto firstSegmentMatches;
if (strA[starPtr] != strB[starPtr] && strA[starPtr] != '?')
return false;
}
}
else
{
for (;; starPtr++)
{
if (strA[starPtr] == '*')
goto firstSegmentMatches;
if (starPtr == strB.Length)
return false;
if (strA[starPtr] != strB[starPtr] && strA[starPtr] != '?')
return false;
}
}
firstSegmentMatches:
int indexA;
int indexB = starPtr;
while (true)
{
indexA = ++starPtr;
if (indexA == strA.Length)
return true;
for (;; starPtr++)
if (starPtr == strA.Length || strA[starPtr] == '*')
break;
for (;; indexB++)
{
if (starPtr - indexA > strB.Length - indexB)
return false;
for (int i = indexA, j = indexB; i < starPtr; i++, j++)
if (strA[i] != strB[j] && strA[i] != '?')
goto tryStringBAgain;
goto findNextSegment;
tryStringBAgain:
continue;
}
findNextSegment:
indexB += starPtr - indexA;
if (starPtr == strA.Length)
return indexB == strB.Length;
}
}
}
And here are my timing results (which I'm not totally sure of, I'm not used to timing code):
My original version: 243 nanoseconds 17 lines of code
Erwin's version: 76 nanoseconds 42 lines of code
My second version: 111 nanoseconds 16 lines of code
My third version: 56 nanoseconds 52 lines of code
I'd appreciate it if someone would check this out and let me know if they find any bugs or anything.
|
|
|
|
|
I found small bug, if compare "*a" and "babbba" function return false.
|
|
|
|
|
Dear Jack,
Dear all,
I used this function in comparing two strings the first is Pattern(* KK *) and the second is Text(TT KK ZZ) and the function return pass. thats briliant,but my question how I can edit the function to be able to catch or handle the characters of matched * to save them in variables. for example:
X = TT
Y = ZZ
to deal with them later on in my system.
I tried many times but its not working well so far.
So please any one have an idea to do that please let me know and its will be appreciated.
Best Regards.
|
|
|
|
|
|
|
Great code, but when trying this I realized that the following pattern is a match:
Search: ????????
Text to search: ABC
The problem is that the pattern can be LONGER than the text searched, in which case it should return a not found, but instead returns found.
Also, this example succeeds:
Search: y*n
Text to search: yessir
But of course should fail, since I'm looking for a text that ends with n
So I re-wrote your program to this, to correctly handle this situation.
bool StrWildCmp(char* wildstring, char *matchstring){
char stopstring[1];
*stopstring = 0;
while(*matchstring) {
if (*wildstring == '*') {
if (!*++wildstring) {
return true;
} else {
*stopstring = *wildstring;
}
}
if(*stopstring) {
if(*stopstring == *matchstring ) {
wildstring++;
matchstring++;
*stopstring = 0;
} else {
matchstring++;
}
} else if((*wildstring == *matchstring) || (*wildstring == '?')) {
wildstring++;
matchstring++;
} else {
return false;
}
if(!*matchstring && *wildstring && *wildstring != '*') {
return false;
}
}
return true;
}
Thanks again for the inspiration.
|
|
|
|
|
some cases don't work properly:
wildstring = "a*bc"
matchstring = "abbc"
should be true, but it returns false
wildstring = "a*b"
matchstring = "a"
should be false, but it returns true
wildstring = "a*?b"
matchstring = "axb"
should be true, but it returns false
wildstring = "a**b"
matchstring = "axb"
should be true, but it returns false (ok, the two ** aren't useful, but they should work)
I solved the last 3 bugs, but the first one is a bit tricky...
bool StrWildCmp(char* wildstring, char *matchstring){
char stopstring[1];
*stopstring = '\0';
while(*matchstring != '\0')
{
if (*wildstring == '*')
{
do
{
wildstring++;
} while (*wildstring == '*');
if (*wildstring == '\0')
{
return TRUE;
}
else
{
*stopstring = *wildstring;
}
}
if(*stopstring != '\0')
{
if((*stopstring == *matchstring) || (*stopstring == '?') )
{
wildstring++;
*stopstring = '\0';
}
matchstring++;
}
else
if((*wildstring == *matchstring) || (*wildstring == '?'))
{
wildstring++;
matchstring++;
}
else
{
return FALSE;
}
if( (*matchstring == '\0') && (*wildstring != '\0') )
{
while (*wildstring == '*')
wildstring++;
if (*wildstring == '\0')
return TRUE;
else
return FALSE;
}
}
|
|
|
|
|
if you use "Circle_hole_pX3.BMP" as matchstring and "*_PX?.*" as wildstring the return value is FALSE.
Here is the updated code:
BOOL StrWildCmp(char* wildstring, char *matchstring)
{
char stopstring[1];
*stopstring = '\0';
char *wildstringNew=wildstring;
while(*matchstring != '\0')
{
if (*wildstring == '*')
{
do
{
wildstring++;
} while (*wildstring == '*');
if (*wildstring == '\0') {
return TRUE;
}
else
{
*stopstring = *wildstring; }
}
if(*stopstring != '\0')
{
if((*stopstring == *matchstring) || (*stopstring == '?') )
{
wildstring++;
*stopstring = '\0';
}
matchstring++;
}
else
if((*wildstring == *matchstring) || (*wildstring == '?'))
{
wildstring++;
matchstring++;
}
else
{
if(*wildstring != '\0')
wildstring=wildstringNew;
else
return FALSE;
}
if( (*matchstring == '\0') && (*wildstring != '\0') )
{
while (*wildstring == '*') wildstring++;
if (*wildstring == '\0') return TRUE;
else
return FALSE;
}
}
}
|
|
|
|
|
|
I recommend against PathMatchSpec(). I used that function in my own code and it just bit me. Its wildcard behavior is broken for all but the simplest cases. For example, these two commands incorrectly return false:
::PathMatchSpec("C:\\Windows", "C:\\Windows.*");
::PathMatchSpec("C:\\Windows", "C:\\Windows.");
|
|
|
|
|
Anyone tried converting this to using wchar_t* (essentially Unicode) instead of char*?
|
|
|
|
|
This is great and got my 5 because is simple, fast and useful!
Here is the wchar_t version:
int wildcmp(const wchar_t *wild, const wchar_t *string)
{
const wchar_t *cp = NULL, *mp = NULL;
while ((*string) && (*wild != L'*')) {
if ((towlower(*wild) != towlower(*string)) && (*wild != L'?')) {
return 0;
}
wild++;
string++;
}
while (*string) {
if (*wild == L'*') {
if (!*++wild) {
return 1;
}
mp = wild;
cp = string+1;
} else if ((towlower(*wild) == towlower(*string)) || (*wild == L'?')) {
wild++;
string++;
} else {
wild = mp;
string = cp++;
}
}
while (*wild == L'*') {
wild++;
}
return !*wild;
}
Example:
if (wildcmp(L"*bl?h.*", L"asblah.plm")) {
MessageBox(0,"we have a match!","wildcmp wide",MB_TOPMOST);
} else {
MessageBox(0,"no match!","wildcmp wide",MB_TOPMOST);
}
|
|
|
|
|
This is the version of the wildcmp function in XBLite programming language:
FUNCTION SBYTE wildcmp( wildcard$, search$)
' wildcmp(const char *wild, const char *string)
' Written by Jack Handy - jakkhandy@hotmail.com
ULONG cp
ULONG mp
STRING s_txt$
ULONG sp
STRING w_txt$
ULONG wp
IFZ search$ THEN RETURN $$FALSE
IFZ wildcard$ THEN RETURN $$FALSE
w_txt$ = wildcard$ + "\0\0" ' Just to be sure
s_txt$ = search$ + "\0\0"
DO WHILE (s_txt${sp}) && (w_txt${wp} != '*')
IF (w_txt${wp} != s_txt${sp} ) && (w_txt${wp} != '?') THEN RETURN $$FALSE
INC wp
INC sp
LOOP
DO WHILE (s_txt${sp})
IF ( w_txt${wp} == '*' ) THEN
INC wp
IF !(w_txt${wp}) THEN RETURN $$TRUE
mp = wp
cp = sp + 1
ELSE
IF (w_txt${wp} == s_txt${sp} ) || (w_txt${wp} == '?') THEN
INC wp
INC sp
ELSE
wp = mp
sp = cp
IF s_txt${sp} THEN INC cp
ENDIF
ENDIF
LOOP
DO WHILE (w_txt${wp} == '*' )
INC wp
LOOP
RETURN !w_txt${wp}
END FUNCTION
|
|
|
|
|
I had converted the wildcmp to C#, it's very easy to wildcard string, thanks so much.
bool WildCompare(string strWild, string strEmail)
{
int cp = 0;
int mp = 0;
int wildIndex = 0;
int emailIndex = 0;
while ((!ValueIsNullOrEmpty(strEmail, emailIndex)) && (ValueAt(strWild, wildIndex) != '*'))
{
if ((ValueAt(strWild, wildIndex) != ValueAt(strEmail, emailIndex)) && (ValueAt(strWild, wildIndex) != '?'))
{
return false;
}
wildIndex++;
emailIndex++;
}
while (!ValueIsNullOrEmpty(strEmail,emailIndex))
{
if (ValueAt( strWild, wildIndex) == '*')
{
wildIndex++;
if (ValueIsNullOrEmpty(strWild,wildIndex ))
{
return true;
}
mp = wildIndex;
cp = emailIndex + 1;
}
else if ((ValueAt(strWild, wildIndex).Equals(ValueAt(strEmail, emailIndex)) || (ValueAt(strWild, wildIndex) == '?')))
{
wildIndex++;
emailIndex++;
}
else
{
wildIndex = mp;
emailIndex = cp++;
}
}
while (ValueAt(strWild, wildIndex) == '*')
{
wildIndex++;
}
return ValueIsNullOrEmpty(strWild, wildIndex);
}
|
|
|
|
|
is it good converted?
Take SharePoint to new height
|
|
|
|
|