|
|
Comments and Discussions
|
|
 |

|
I read this article " ,its really amazing. I appreciate your efforts. I am student, I need help in defining the same kind of function according to my requirements. I hope, I 'll get good response.
Words are strings which are separated by dots. Two additional characters are also valid i.e:The *, which matches 1 word and the #, which matches 0..N words Example: *.stock.# matches the routing keys usd.stock and eur.stock.dsf but not stock.nasdaq.
Your help would be highly appreciated.
Sam
|
|
|
|
|

|
First of all I like this code, it is small and fully stand-alone.
I have modified it, because I need an additional wildcard joker that represents digits. Finally the modified function accepts '*', '?' and '#' as joker characters.
int wildcmp_ex(const char *wild, const char *string) {
const char *cp = NULL, *mp = NULL;
while (*string) {
if (*wild == '*') {
if (!*++wild) {
return 1;
}
mp = wild;
cp = string+1;
} else if (((*wild == *string) && (*wild != '#')) || (*wild == '?') || ((*wild == '#') && isdigit(*string))) {
wild++;
string++;
} else {
if (mp)
{
wild = mp;
string = cp++;
}
else
{
return 0;
}
}
}
while (*wild == '*') {
wild++;
}
return !*wild;
}
Thomas Haase
modified 29 Sep '11 - 8:26.
|
|
|
|

|
Hi Jack Handy,
Is there a licence attached to this code?
Thanks, Mark
|
|
|
|

|
Just for fun... a C# version with almost the same syntax as the original C version
public static bool wildcmp(string pattern, string text) {
var wild = new StringScanner(pattern);
var @string = new StringScanner(text);
var mp = wild;
var cp = @string;
while (@string && wild != '*') {
if (wild != @string && wild != '?') {
return false;
}
wild++;
@string++;
}
while (@string) {
if (@wild == '*') {
if (!++wild) {
return true;
}
mp = wild;
cp = @string + 1;
} else if (wild == @string || wild == '?') {
wild++;
@string++;
} else {
wild = mp;
@string = cp++;
}
}
while (wild == '*') {
wild++;
}
return !wild;
}
public struct StringScanner
{
private string _string;
private int _position;
public StringScanner(string s)
{
_string = s;
_position = 0;
}
public string String
{
get { return _string; }
}
public int Position
{
get { return _position; }
}
public bool Finished
{
get { return _position == _string.Length;}
}
public char Current
{
get { return Finished ? '\0' : _string[_position]; }
}
public bool MoveNext()
{
if (Finished)
return false;
_position++;
return true;
}
public static StringScanner operator ++(StringScanner scanner)
{
scanner.MoveNext();
return scanner;
}
public static StringScanner operator +(StringScanner scanner, int n)
{
return new StringScanner(scanner.String)
{
_position = Math.Min(scanner.Position + n, scanner.String.Length)
};
}
public static implicit operator bool(StringScanner scanner)
{
return !scanner.Finished;
}
public static implicit operator char(StringScanner scanner)
{
return scanner.Current;
}
public static bool operator ==(StringScanner scanner1, StringScanner scanner2)
{
return scanner1.Current == scanner2.Current;
}
public static bool operator !=(StringScanner scanner1, StringScanner scanner2)
{
return scanner1.Current != scanner2.Current;
}
}
|
|
|
|

|
I've been using this for years, just don't show it to your instructor.
BOOL wm(const char *s, const char *t)
{
return *t-'*' ? *s ? (*t=='?') | (toupper(*s)==toupper(*t)) && wm(s+1,t+1) : !*t : wm(s,t+1) || *s && wm(s+1,t);
}
If you want case sensitive, remove the toupper() calls.
|
|
|
|

|
This strikes me as an obvious place to use recursion. So here goes...
public class MString
{
public static bool CompareWWc(string strA, string strB, bool ignoreCase)
{
if (ignoreCase)
return CompareWWc(strA.ToLower(), strB.ToLower());
else
return CompareWWc(strA, strB);
}
public static bool CompareWWc(string strA, string strB)
{
for (int i = 0; i < strA.Length; i++)
{
if (strA[i] == '*')
{
if (i == strA.Length - 1)
return true;
strA = strA.Substring(i + 1); for (int j = i; j < strB.Length; j++)
if (CompareWWc(strA, strB.Substring(j)))
return true;
return false;
}
if (i >= strB.Length || (strA[i] != strB[i] && strA[i] != '?'))
return false;
}
return strA.Length == strB.Length;
}
}
And here's a little test sequence:
if (!MString.CompareWWc("", ""))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("something", "something"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("something", "zomething"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("something", "some"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("something", "something else"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("s?m?th???", "something"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("s?m?th???", "somethin"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("*", ""))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("*", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("non*", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("*nonsense", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("non*nse", "nonsense"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("non*nse", "nonsenze"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("non*n?e", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("n*on*nse", "nonsense"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("n*n*nse", "nonsense"))
Console.WriteLine("Something wrong!");
if (MString.CompareWWc("*non*nse", "nonsenze"))
Console.WriteLine("Something wrong!");
if (!MString.CompareWWc("n*n*n?e", "nonsense"))
Console.WriteLine("Something wrong!");
}
By the way, the name CompareWWc means Compare With Wildcards.
|
|
|
|

|
Dear Jack,
Dear all,
I used this function in comparing two strings the first is Pattern(* KK *) and the second is Text(TT KK ZZ) and the function return pass. thats briliant,but my question how I can edit the function to be able to catch or handle the characters of matched * to save them in variables. for example:
X = TT
Y = ZZ
to deal with them later on in my system.
I tried many times but its not working well so far.
So please any one have an idea to do that please let me know and its will be appreciated.
Best Regards.
|
|
|
|
|

|
Great code, but when trying this I realized that the following pattern is a match:
Search: ????????
Text to search: ABC
The problem is that the pattern can be LONGER than the text searched, in which case it should return a not found, but instead returns found.
Also, this example succeeds:
Search: y*n
Text to search: yessir
But of course should fail, since I'm looking for a text that ends with n
So I re-wrote your program to this, to correctly handle this situation.
bool StrWildCmp(char* wildstring, char *matchstring){
char stopstring[1];
*stopstring = 0;
while(*matchstring) {
if (*wildstring == '*') {
if (!*++wildstring) {
return true;
} else {
*stopstring = *wildstring;
}
}
if(*stopstring) {
if(*stopstring == *matchstring ) {
wildstring++;
matchstring++;
*stopstring = 0;
} else {
matchstring++;
}
} else if((*wildstring == *matchstring) || (*wildstring == '?')) {
wildstring++;
matchstring++;
} else {
return false;
}
if(!*matchstring && *wildstring && *wildstring != '*') {
return false;
}
}
return true;
}
Thanks again for the inspiration.
|
|
|
|
|

|
Anyone tried converting this to using wchar_t* (essentially Unicode) instead of char*?
|
|
|
|

|
This is the version of the wildcmp function in XBLite programming language:
FUNCTION SBYTE wildcmp( wildcard$, search$)
' wildcmp(const char *wild, const char *string)
' Written by Jack Handy - jakkhandy@hotmail.com
ULONG cp
ULONG mp
STRING s_txt$
ULONG sp
STRING w_txt$
ULONG wp
IFZ search$ THEN RETURN $$FALSE
IFZ wildcard$ THEN RETURN $$FALSE
w_txt$ = wildcard$ + "\0\0" ' Just to be sure
s_txt$ = search$ + "\0\0"
DO WHILE (s_txt${sp}) && (w_txt${wp} != '*')
IF (w_txt${wp} != s_txt${sp} ) && (w_txt${wp} != '?') THEN RETURN $$FALSE
INC wp
INC sp
LOOP
DO WHILE (s_txt${sp})
IF ( w_txt${wp} == '*' ) THEN
INC wp
IF !(w_txt${wp}) THEN RETURN $$TRUE
mp = wp
cp = sp + 1
ELSE
IF (w_txt${wp} == s_txt${sp} ) || (w_txt${wp} == '?') THEN
INC wp
INC sp
ELSE
wp = mp
sp = cp
IF s_txt${sp} THEN INC cp
ENDIF
ENDIF
LOOP
DO WHILE (w_txt${wp} == '*' )
INC wp
LOOP
RETURN !w_txt${wp}
END FUNCTION
|
|
|
|

|
I had converted the wildcmp to C#, it's very easy to wildcard string, thanks so much.
bool WildCompare(string strWild, string strEmail)
{
int cp = 0;
int mp = 0;
int wildIndex = 0;
int emailIndex = 0;
while ((!ValueIsNullOrEmpty(strEmail, emailIndex)) && (ValueAt(strWild, wildIndex) != '*'))
{
if ((ValueAt(strWild, wildIndex) != ValueAt(strEmail, emailIndex)) && (ValueAt(strWild, wildIndex) != '?'))
{
return false;
}
wildIndex++;
emailIndex++;
}
while (!ValueIsNullOrEmpty(strEmail,emailIndex))
{
if (ValueAt( strWild, wildIndex) == '*')
{
wildIndex++;
if (ValueIsNullOrEmpty(strWild,wildIndex ))
{
return true;
}
mp = wildIndex;
cp = emailIndex + 1;
}
else if ((ValueAt(strWild, wildIndex).Equals(ValueAt(strEmail, emailIndex)) || (ValueAt(strWild, wildIndex) == '?')))
{
wildIndex++;
emailIndex++;
}
else
{
wildIndex = mp;
emailIndex = cp++;
}
}
while (ValueAt(strWild, wildIndex) == '*')
{
wildIndex++;
}
return ValueIsNullOrEmpty(strWild, wildIndex);
}
|
|
|
|

|
Well, as direct as I could come up with anyway. Makes use of unsafe to enable pointer arithmetic. Unfortunately, because fixed is required to prevent the GC from moving the pointers, I had to change it to use increment indexers instead of directly manipulating the pointers. Alternatively, you could use stackalloc to instantiate two native char[]'s and copy the values, but that seems contrary to this function's low-memory footprint, high performance goals. Has been tested against every test case presented in the comments section as well as some additional cases I threw in. public unsafe static bool GlobCompare( string glob, string path ) { fixed ( char* pGlob = glob, pPath = path ) { int pGlobInc = 0; int pPathInc = 0; int mp = 0; int cp = 0; while ( ( *( pPath + pPathInc ) != 0 ) && ( *( pGlob + pGlobInc ) != '*' ) ) { if ( ( *( pGlob + pGlobInc ) != *( pPath + pPathInc ) ) && ( *( pGlob + pGlobInc ) != '?' ) ) { return false; } pGlobInc++; pPathInc++; } while ( *( pPath + pPathInc ) != 0 ) { if ( *( pGlob + pGlobInc ) == '*' ) { if ( 0 == *( pGlob + ++pGlobInc ) ) { return true; } mp = pGlobInc; cp = pPathInc + 1; } else if ( ( *( pGlob + pGlobInc ) == *( pPath + pPathInc ) ) || ( *( pGlob + pGlobInc ) == '?' ) ) { pGlobInc++; pPathInc++; } else { pGlobInc = mp; pPathInc = cp++; } } while ( *( pGlob + pGlobInc ) == '*' ) { pGlobInc++; } return ( 0 == *( pGlob + pGlobInc ) ); } }
|
|
|
|
|

|
I am using this in Artistic Style, a popular multi-platform code formatter available at SourceForge.
http://astyle.sourceforge.net/
Release 1.22 added directory recursion to the project. Wildcard processing was made internal to the program. Linux has a glob function but Windows doesn't. I just used this for both of them. It let me process both platforms in a similar manner.
A minor change was made for Windows to make the comparison case insensitive. Linux was left case sensitive.
Thanks for making it available. Using this was a lot easier than writing my own. I doubt that mine would have been this sophisticated.
|
|
|
|

|
Boy do I feel stupid. I worked on an algorithm like this for days, and never got it quite right. Then, I see the wonderful, and simplistic work of someone like this, and it reminds me that sometimes we all are guilty of 'over-engineering'...
Thanks Mr. Handy!
|
|
|
|

|
How can this code be converted to do a replace? I need to provide a find/replace dialog in an application and I don't want to jump through the hoops of the Boost library. Can anyone help?
Patrick
|
|
|
|

|
Here's RegExp version (may be easily ported to C++).
Pros: More readable, Relies on proven RegExp
Cons: Maybe slower?, If eval string contains RegExp keywords then it might result in unexpected result
public static bool Match(string eval, string pattern, bool caseSensitive)
{
bool match = false;
// Make input parameters lower-case if case is not an issue
if (!caseSensitive)
{
eval = eval.ToLower();
pattern = pattern.ToLower();
}
// Escape regexp special character in pattern
pattern = pattern.Replace(".", @"\.");
// Replace valid wildcards with regexp equivalents
pattern = pattern.Replace('?', '.').Replace("*", ".*");
// Add boundaries to pattern
pattern = @"\A" + pattern + @"\z";
// Search for a match
try
{
match = Regex.IsMatch(eval, pattern);
}
catch /* (ArgumentException ex) */
{
// Syntax error in the regular expression
}
// Return result
return match;
}
|
|
|
|

|
This is tight and clever. Thanks for sharing it.
|
|
|
|

|
Hi, wildcmp("*<*>", "<field1><field2>") return 1 while I think it should return 0 (I maybe wrong, so please tell me). If someone knows how to fix it, I will appreciate. Regards
|
|
|
|

|
I think it's better to make the function return a bool value. Anyway, many string comparision functions return 0 when the strings equal.
|
|
|
|

|
if wild = "*?.abc", str = "abc.abc"
wildcmp(wild, str) not work
but if wild = "?*.abc", str = "abc.abc"
wildcmp(wild, str) do work
does anyone have any idea about the case?
|
|
|
|

|
Simple, fast, useful, AND fun to figure out.
Well done.
|
|
|
|

|
i got the overall flow of the program I didnt get the logic of the second loop completely. I understand that in the second loop it checks if there is nothing after * if so then it is a match but if there is something it stores them in the two pointers and then goes on.
also in the final else it goes like else
{
wild = mp;
string = cp++;
}
am sorry but am not getting the logic totally.
can someone please explain?
|
|
|
|
|

|
Hello,
i think this post is very interesting because is very simple and make very cool work !
BUT !
I don't understand why you make 3 loop to do it ?
I think i don't see all case, because for me only the 2 loop make all the work ?
I'm trying to understand all the process to add optionnal char with the ^ escape sequence, for exemple : ^-* match -12 or 12
Thanks
|
|
|
|

|
Hi, i have a stupid question, could someone give me the c# version
thanks in advance
|
|
|
|

|
most C compare functions return zero when the values are equal, but this function returns non-zero.
Personally, I find the non-zero to be more intuitive .. but after years of forcing myself to check for zero I find it a bit counter-intuitive.
I think I'll just rename the function when I add it to my library
But that certainly wont stop me from using this wonderful routine.
Many sincere thanks ...
|
|
|
|

|
This is realy nice & and useful code. I used to write something similar, but your example is simplier and shorter.
Because it lacks comments, I spent some time to understand (before I saw comment form Targys - real tutorial ) and it is clear now. Thanks to both of you!
To 'wise' guys, flamers, and other people who has nothing to do instead of arguing:
- If the code has a bug, report but don't pretend you are a genius or a guru. If you can do it better, submit an article.
If you don't like the code, don't use it!
And about NULL pointers:
Idiot-proofing should be implemented at the level where data (function arguments) is acquired and prepared, not in such low-level function.
Besides that, I tested several functions from string.h with NULL parameters and every single one threw an exception. No further comments...
Regards, Voja
|
|
|
|

|
Great piece of code, but I have one minor improvement. It appears to me that the variable "cp" doesn't do anything and servers no purpose.
If I'm correct, then you can safely remove the line:
cp = string+1;
and also remove:
string = cp;
and replace:
cp = string++;
with:
++string;
I'm believe the results would be identical.
|
|
|
|

|
Just a thought:
the PathMatchSpec SLWU API could provide similar. I guess it does have some differences (e.g. allowing to specify multiple specs, separated by semicolon), but it might be a simple alternative for many similar tasks.
we are here to help each other get through this thing, whatever it is Vonnegut jr.
sighist || Agile Programming | doxygen
|
|
|
|

|
Tried these wildcards, and they show different results in your code and in Windows Explorer's search command.
??x*
*so*
??so*
??so??
Lack of comments in the code also make it a bit difficult to understand. On the whole however, good job!
Bikram
|
|
|
|

|
I want case insenstive wildcmp function. Could anyone help me?
|
|
|
|

|
Very nice, compact, works great and is very fast. Thanks, Jack!
Best wishes,
Hans
|
|
|
|

|
I've seen a few people in these boards complain that I didn't check for null pointers in this function. This is a C function and the last time I checked, passing NULL to strcmp or any other C string function will segfault. I'm not saying this is great, and if you wanted to add a check for null, that would be fine. I just don't think that this is a 'bug' (if you can even call it that) worth flaming an otherwise great function.
-Jack
There are 10 types of people in this world, those that understand binary and those who don't.
|
|
|
|

|
great code, but if I'm not mistaken
cp can point beyon string array bounds.
try: wild="*a", string="xyzab"
correction:
string = cp++;
should be changed to:
string = cp;
if(*cp) cp++;
|
|
|
|

|
I saw this one a long time ago, and finally have a use for it. Thank you very much.
Chris Richardson
Programmers find all sorts of ingenious ways to screw ourselves over. - Tim Smith
|
|
|
|

|
I converted this into C# and bingo...
I tried to break it but couldn't
|
|
|
|
|

|
Could anyone explain how this code works for me? I am having trouble trying to figure out what is going on in a couple places. I would think a short explination would help out some other people like me who don't know C. Thanks in advance!
|
|
|
|

|
why use local variables and so many loops? it can be much easier to match two strings. i don't understand why so many people spend hours to search the web for wildcard matching when they can write it themselves in 5 minutes time??! the code below could be shorter but it's easier to read like this. i didn't debug it very much but it will work, though. // ------------------------------------------------------------------- int wildcmp(const char* wild, const char* string) // ------------------------------------------------------------------- { if(*wild == *string) return '\0' == *string || wildcmp(++wild, ++string); if('\0' == *string) return '*' == *wild && wildcmp(++wild, string); switch(*wild) { case '?': return wildcmp(++wild, ++string); case '*': wild++; if('\0' == *wild) return 1; while(*string != '\0') if(wildcmp(wild, string++)) return 1; default: return 0; } } yours, the c++ guru himself.
|
|
|
|

|
Thanks for the code! I was searching the web for a simple wildcard matching routine ... I don't want full blown regular expressions, I just want to do simple wildcards against a list of filenames. This is perfect!
|
|
|
|

|
I found this very useful. thx!
Todd Smith
|
|
|
|

|
I ran the following test cases through wildcmp, all successful. (The test() function compares the pattern to the input string, and compares against the expected result).
test( "", "", true );
test( "*", "", true );
test( "*", "A", true );
test( "", "A", false );
test( "A*", "", false );
test( "A*", "AAB", true );
test( "A*", "BAA", false );
test( "A*", "A", true );
test( "A*B", "", false );
test( "A*B", "AAB", true );
test( "A*B", "AB", true );
test( "A*B", "AABA", false );
test( "A*B", "ABAB", true );
test( "A*B", "ABBBB", true );
test( "A*B*C", "", false );
test( "A*B*C", "ABC", true );
test( "A*B*C", "ABCC", true );
test( "A*B*C", "ABBBC", true );
test( "A*B*C", "ABBBBCCCC", true );
test( "A*B*C", "ABCBBBCBCCCBCBCCCC", true );
test( "A*B*", "AB", true );
test( "A*B*", "AABA", true );
test( "A*B*", "ABAB", true );
test( "A*B*", "ABBBB", true );
test( "A*B*C*", "", false );
test( "A*B*C*", "ABC", true );
test( "A*B*C*", "ABCC", true );
test( "A*B*C*", "ABBBC", true );
test( "A*B*C*", "ABBBBCCCC", true );
test( "A*B*C*", "ABCBBBCBCCCBCBCCCC", true );
test( "A?", "AAB", false );
test( "A?B", "AAB", true );
test( "A?*", "A", false );
test( "A?*", "ABBCC", true );
test( "A?*", "BAA", false );
Paul McGuire
KLA-Tencor/Process Analysis & Control Division
Austin, TX
|
|
|
|

|
Do you have a regex formatter/matcher source code. Please reply, Thanks!
|
|
|
|

|
This is EXACTLY what I have been looking for! I have been searching the web for a simple wildcard matcher, and all I have found is regex after regex of source code. I even started reading a regex tutorial. Why waste time learning regex and bloating my program with 20k of regex code I'll never use when 20 lines of pure bliss can be inserted? Thanks!!!!!
|
|
|
|

|
wildcmp("fold","??*"); should match.
This message has been virus scanned
|
|
|
|

|
Under DOS / Windows following line should return positive result
wildcmp("*.*", "aaaaa")
It would be nice if you would add this feature too
Miroslav Rajcic
http://www.spacetide.com
|
|
|
|
 |
|
|
General News Suggestion Question Bug Answer Joke Rant Admin
|
Matches a string against a wildcard string such as "*.*" or "bl?h.*" etc. This is good for file globbing or to match hostmasks.
| Type | Article |
| Licence | |
| First Posted | 1 May 2001 |
| Views | 683,118 |
| Bookmarked | 89 times |
|
|