// strstr_SHORT-SHOWDOWN.c, revision 6+++, 2011 Sep 07.
// homepage: www.sanmayce.com/Railgun/index.html
// 1] A BUG (which appeared in r.6++) crushed:
// Fix #1: The actual fix for r.6+++:
// //countSTATIC = cbPattern-2-3;
// countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfitable in code cache lines?!
// Fix #2: One possible fix for r.6+++:
// // Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
// //if (countSTATIC<0) countSTATIC=0;
// Fix #3: Another possible fix for r.6+++:
// // A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
// while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
/*
The bug (appearing in BMH section only with pattern lengths of 4) which I overlooked comes in handy, it shows something important (as far as I understand), namely the significance of one instruction (2bytes of code)!
Disastrous degradation (1209KB/clock down to 910KB/clock) just from 'count !=0' replacement with 'count >0', how is this possible, maybe it is related to some microcode cache limits?!
I should surely like to hear (at sanmayce@sanmayce.com) from an expert what causes this 300MB difference.
For r.6++ (buggy) the 'while' code is 11 bytes:
$LL3@Railgun_Qu@2:
009d2 8a 16 mov dl, BYTE PTR [esi]
009d4 3a 11 cmp dl, BYTE PTR [ecx]
009d6 75 05 jne SHORT $LN68@Railgun_Qu@2
009d8 41 inc ecx
009d9 46 inc esi
009da 48 dec eax
009db 75 f5 jne SHORT $LL3@Railgun_Qu@2
$LN68@Railgun_Qu@2:
For r.6+++ (fixed) the 'while' code is 11+2 bytes:
$LL3@Railgun_Qu@2:
009d2 8a 16 mov dl, BYTE PTR [esi]
009d4 3a 11 cmp dl, BYTE PTR [ecx]
009d6 75 0f jne SHORT $LN2@Railgun_Qu@2
009d8 48 dec eax
009d9 41 inc ecx
009da 46 inc esi
009db 85 c0 test eax, eax
009dd 7f f3 jg SHORT $LL3@Railgun_Qu@2
$LN59@Railgun_Qu@2:
Fragment r.6++ (buggy) [
strstr_SHORT-SHOWDOWN_Microsoft_v16_Ox.exe fly Dumbo fly
strstr_SHORT-SHOWDOWN, revision 6++, written by Kaze.
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun_6pp performance: 645KB/clock
Found ('to') 1076629 time(s), Railgun_6pp performance: 678KB/clock
Found ('TDK') 0 time(s), Railgun_6pp performance: 1167KB/clock
Found ('the') 2114180 time(s), Railgun_6pp performance: 678KB/clock
Found ('fast') 5945 time(s), Railgun_6pp performance: 918KB/clock
Found ('easy') 5191 time(s), Railgun_6pp performance: 990KB/clock
Found ('grmbl') 0 time(s), Railgun_6pp performance: 1287KB/clock
Found ('email') 1 time(s), Railgun_6pp performance: 1167KB/clock
Found ('pasting') 2 time(s), Railgun_6pp performance: 1422KB/clock
Found ('amazing') 323 time(s), Railgun_6pp performance: 1603KB/clock
Found ('underdog') 4 time(s), Railgun_6pp performance: 1836KB/clock
Found ('superdog') 0 time(s), Railgun_6pp performance: 1603KB/clock
Found ('participants') 147 time(s), Railgun_6pp performance: 2557KB/clock
Found ('skillessness') 0 time(s), Railgun_6pp performance: 2126KB/clock
Found ('I should have known') 1 time(s), Railgun_6pp performance: 2126KB/clock
Found ('human consciousness') 519 time(s), Railgun_6pp performance: 2149KB/clock
Railgun_6pp 8x2 i.e. average performance: 1209KB/clock
Fragment r.6++ (buggy) ]
Fragment r.6+++ (fixed) with either Fix #2 or Fix #3 [
strstr_SHORT-SHOWDOWN_Microsoft_v16_Ox.exe fly Dumbo fly
strstr_SHORT-SHOWDOWN, revision 6+++, written by Kaze.
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun_6pp performance: 678KB/clock
Found ('to') 1076629 time(s), Railgun_6pp performance: 678KB/clock
Found ('TDK') 0 time(s), Railgun_6pp performance: 1167KB/clock
Found ('the') 2114180 time(s), Railgun_6pp performance: 643KB/clock
Found ('fast') 5945 time(s), Railgun_6pp performance: 587KB/clock
Found ('easy') 5191 time(s), Railgun_6pp performance: 614KB/clock
Found ('grmbl') 0 time(s), Railgun_6pp performance: 805KB/clock
Found ('email') 1 time(s), Railgun_6pp performance: 713KB/clock
Found ('pasting') 2 time(s), Railgun_6pp performance: 990KB/clock
Found ('amazing') 323 time(s), Railgun_6pp performance: 990KB/clock
Found ('underdog') 4 time(s), Railgun_6pp performance: 1167KB/clock
Found ('superdog') 0 time(s), Railgun_6pp performance: 1074KB/clock
Found ('participants') 147 time(s), Railgun_6pp performance: 1603KB/clock
Found ('skillessness') 0 time(s), Railgun_6pp performance: 1422KB/clock
Found ('I should have known') 1 time(s), Railgun_6pp performance: 1603KB/clock
Found ('human consciousness') 519 time(s), Railgun_6pp performance: 1603KB/clock
Railgun_6pp 8x2 i.e. average performance: 910KB/clock
Fragment r.6+++ (fixed) with either Fix #2 or Fix #3 ]
Fragment r.6+++ (fixed) with Fix #1 on Microsoft v16 [
strstr_SHORT-SHOWDOWN_Microsoft_v16_Ox.exe fly Dumbo fly
strstr_SHORT-SHOWDOWN, revision 6+++, written by Kaze.
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun_6pp performance: 678KB/clock
Found ('to') 1076629 time(s), Railgun_6pp performance: 678KB/clock
Found ('TDK') 0 time(s), Railgun_6pp performance: 1167KB/clock
Found ('the') 2114180 time(s), Railgun_6pp performance: 716KB/clock
Found ('fast') 5945 time(s), Railgun_6pp performance: 918KB/clock
Found ('easy') 5191 time(s), Railgun_6pp performance: 990KB/clock
Found ('grmbl') 0 time(s), Railgun_6pp performance: 1287KB/clock
Found ('email') 1 time(s), Railgun_6pp performance: 1167KB/clock
Found ('pasting') 2 time(s), Railgun_6pp performance: 1433KB/clock
Found ('amazing') 323 time(s), Railgun_6pp performance: 1603KB/clock
Found ('underdog') 4 time(s), Railgun_6pp performance: 1820KB/clock
Found ('superdog') 0 time(s), Railgun_6pp performance: 1836KB/clock
Found ('participants') 147 time(s), Railgun_6pp performance: 2557KB/clock
Found ('skillessness') 0 time(s), Railgun_6pp performance: 2126KB/clock
Found ('I should have known') 1 time(s), Railgun_6pp performance: 2149KB/clock
Found ('human consciousness') 519 time(s), Railgun_6pp performance: 2525KB/clock
Railgun_6pp 8x2 i.e. average performance: 1213KB/clock
Fragment r.6+++ (fixed) with Fix #1 on Microsoft v16 ]
Fragment r.6+++ (fixed) with Fix #1 on Intel v12 fastest result [
strstr_SHORT-SHOWDOWN_Intel_v12_O3_Qparallel.exe fly Dumbo fly
strstr_SHORT-SHOWDOWN, revision 6+++, written by Kaze.
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun_6pp performance: 537KB/clock
Found ('to') 1076629 time(s), Railgun_6pp performance: 537KB/clock
Found ('TDK') 0 time(s), Railgun_6pp performance: 585KB/clock
Found ('the') 2114180 time(s), Railgun_6pp performance: 445KB/clock
Found ('fast') 5945 time(s), Railgun_6pp performance: 918KB/clock
Found ('easy') 5191 time(s), Railgun_6pp performance: 990KB/clock
Found ('grmbl') 0 time(s), Railgun_6pp performance: 1287KB/clock
Found ('email') 1 time(s), Railgun_6pp performance: 1167KB/clock
Found ('pasting') 2 time(s), Railgun_6pp performance: 1603KB/clock
Found ('amazing') 323 time(s), Railgun_6pp performance: 1603KB/clock
Found ('underdog') 4 time(s), Railgun_6pp performance: 1820KB/clock
Found ('superdog') 0 time(s), Railgun_6pp performance: 1836KB/clock
Found ('participants') 147 time(s), Railgun_6pp performance: 2126KB/clock
Found ('skillessness') 0 time(s), Railgun_6pp performance: 2126KB/clock
Found ('I should have known') 1 time(s), Railgun_6pp performance: 2149KB/clock
Found ('human consciousness') 519 time(s), Railgun_6pp performance: 2525KB/clock
Railgun_6pp 8x2 i.e. average performance: 1029KB/clock
Fragment r.6+++ (fixed) with Fix #1 on Intel v12 fastest result ]
*/
// One thing changed in r.6++:
// 1] Instead of 1 byte the comparison in BM section is now 4 bytes.
// Two things changed in r.6+:
// 1] countSTATIC (which is equal to cbPattern-1) was removed (replaced by cbPattern-1) in case of <961, it results in faster 'frustration' pattern execution than the GNU variant.
// 2] case <4 crippled.
/*
D:\_KAZE_new-stuff\KAZE_strstr_SHORT-SHOWDOWN_r6+>cl /Ox /Tcstrstr_SHORT-SHOWDOWN.c /Fastrstr_SHORT-SHOWDOWN /w /FAcs
Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 16.00.30319.01 for 80x86
Copyright (C) Microsoft Corporation. All rights reserved.
strstr_SHORT-SHOWDOWN.c
Microsoft (R) Incremental Linker Version 10.00.30319.01
Copyright (C) Microsoft Corporation. All rights reserved.
/out:strstr_SHORT-SHOWDOWN.exe
strstr_SHORT-SHOWDOWN.obj
D:\_KAZE_new-stuff\KAZE_strstr_SHORT-SHOWDOWN_r6+>strstr_SHORT-SHOWDOWN.exe
strstr_SHORT-SHOWDOWN, revision 6+, written by Kaze.
Input Pattern(up to 19+2000 chars): but
Doing Search for Pattern(3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 147336/945
strstr_Microsoft performance: 204KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 147336/513
strstr_GNU_C_Library performance: 376KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 147336/600
Railgun performance: 321KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 147336/520
Railgun_Quadruplet performance: 370KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
KarpRabinKaze_BOOSTED_hits/KarpRabinKaze_BOOSTED_clocks: 147336/585
KarpRabinKaze_BOOSTED performance: 329KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
KarpRabinKaze_hits/KarpRabinKaze_clocks: 147336/598
KarpRabinKaze performance: 322KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
Karp_Rabin_hits/Karp_Rabin_clocks: 147336/698
Karp_Rabin performance: 276KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
Brute_Force_Dummy_hits/Brute_Force_Dummy_clocks: 147336/776
Brute_Force_Dummy performance: 248KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
Boyer-Moore-Horspool_hits/Boyer-Moore-Horspool_clocks: 147336/831
Boyer-Moore-Horspool performance: 232KB/clock
StrnglenTRAVERSED: 197529022 bytes
LinesEncountered: 2459508
Boyer_Moore_Horspool_Kaze_hits/Boyer_Moore_Horspool_Kaze_clocks: 147336/807
Boyer_Moore_Horspool_Kaze performance: 239KB/clock
StrnglenTRAVERSED: 197529022 bytes
Doing Search for Pattern(3bytes) into String(206908949bytes) as-one-line ...
Railgun_hits/Railgun_clocks: 151493/449
Railgun performance: 450KB/clock
DUMBO 8x2 ...
Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 1212509/803
strstr_Microsoft performance: 168KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1212509/503
strstr_GNU_C_Library performance: 268KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 1212509/545
Railgun performance: 248KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 1212509/429
Railgun_Quadruplet performance: 315KB/clock
StrnglenTRAVERSED: 138478024 bytes
Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 780175/916
strstr_Microsoft performance: 175KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 780175/556
strstr_GNU_C_Library performance: 288KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 780175/575
Railgun performance: 279KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 780175/460
Railgun_Quadruplet performance: 349KB/clock
StrnglenTRAVERSED: 164505415 bytes
Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/947
strstr_Microsoft performance: 210KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/501
strstr_GNU_C_Library performance: 398KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/604
Railgun performance: 330KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/470
Railgun_Quadruplet performance: 424KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 1192002/826
strstr_Microsoft performance: 160KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1192002/536
strstr_GNU_C_Library performance: 247KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 1192002/524
Railgun performance: 253KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 1192002/465
Railgun_Quadruplet performance: 285KB/clock
StrnglenTRAVERSED: 135882884 bytes
Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 5384/964
strstr_Microsoft performance: 206KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 5384/516
strstr_GNU_C_Library performance: 386KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 5384/603
Railgun performance: 330KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 5384/501
Railgun_Quadruplet performance: 398KB/clock
StrnglenTRAVERSED: 204186782 bytes
Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 4825/1142
strstr_Microsoft performance: 174KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4825/690
strstr_GNU_C_Library performance: 289KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 4825/606
Railgun performance: 329KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 4825/664
Railgun_Quadruplet performance: 300KB/clock
StrnglenTRAVERSED: 204202166 bytes
Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/969
strstr_Microsoft performance: 206KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/522
strstr_GNU_C_Library performance: 382KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/598
Railgun performance: 333KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/502
Railgun_Quadruplet performance: 397KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/1139
strstr_Microsoft performance: 175KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/681
strstr_GNU_C_Library performance: 293KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 1/606
Railgun performance: 329KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 1/662
Railgun_Quadruplet performance: 301KB/clock
StrnglenTRAVERSED: 204449414 bytes
Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 2/964
strstr_Microsoft performance: 207KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 2/516
strstr_GNU_C_Library performance: 386KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 2/590
Railgun performance: 338KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 2/493
Railgun_Quadruplet performance: 404KB/clock
StrnglenTRAVERSED: 204449363 bytes
Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 319/1061
strstr_Microsoft performance: 188KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 319/606
strstr_GNU_C_Library performance: 329KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 319/591
Railgun performance: 337KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 319/574
Railgun_Quadruplet performance: 347KB/clock
StrnglenTRAVERSED: 204432134 bytes
Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 4/998
strstr_Microsoft performance: 200KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 4/545
strstr_GNU_C_Library performance: 366KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 4/588
Railgun performance: 339KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 4/513
Railgun_Quadruplet performance: 389KB/clock
StrnglenTRAVERSED: 204449185 bytes
Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1043
strstr_Microsoft performance: 191KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/591
strstr_GNU_C_Library performance: 337KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/589
Railgun performance: 338KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/560
Railgun_Quadruplet performance: 356KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 141/965
strstr_Microsoft performance: 206KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 141/516
strstr_GNU_C_Library performance: 386KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 141/577
Railgun performance: 346KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 141/481
Railgun_Quadruplet performance: 415KB/clock
StrnglenTRAVERSED: 204441500 bytes
Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1039
strstr_Microsoft performance: 192KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/589
strstr_GNU_C_Library performance: 338KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/586
Railgun performance: 340KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/550
Railgun_Quadruplet performance: 363KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 1/958
strstr_Microsoft performance: 208KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 1/507
strstr_GNU_C_Library performance: 393KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 1/556
Railgun performance: 359KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 1/455
Railgun_Quadruplet performance: 438KB/clock
StrnglenTRAVERSED: 204449346 bytes
Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 514/1033
strstr_Microsoft performance: 193KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 514/580
strstr_GNU_C_Library performance: 344KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 514/561
Railgun performance: 355KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 514/513
Railgun_Quadruplet performance: 389KB/clock
StrnglenTRAVERSED: 204422699 bytes
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun performance: 293KB/clock
Found ('to') 1076629 time(s), Railgun performance: 293KB/clock
Found ('TDK') 0 time(s), Railgun performance: 495KB/clock
Found ('the') 2114180 time(s), Railgun performance: 391KB/clock
Found ('fast') 5945 time(s), Railgun performance: 559KB/clock
Found ('easy') 5191 time(s), Railgun performance: 614KB/clock
Found ('grmbl') 0 time(s), Railgun performance: 759KB/clock
Found ('email') 1 time(s), Railgun performance: 713KB/clock
Found ('pasting') 2 time(s), Railgun performance: 922KB/clock
Found ('amazing') 323 time(s), Railgun performance: 985KB/clock
Found ('underdog') 4 time(s), Railgun performance: 1174KB/clock
Found ('superdog') 0 time(s), Railgun performance: 1069KB/clock
Found ('participants') 147 time(s), Railgun performance: 1422KB/clock
Found ('skillessness') 0 time(s), Railgun performance: 1433KB/clock
Found ('I should have known') 1 time(s), Railgun performance: 1603KB/clock
Found ('human consciousness') 519 time(s), Railgun performance: 1603KB/clock
Railgun 8x2 i.e. average performance: 669KB/clock
D:\_KAZE_new-stuff\KAZE_strstr_SHORT-SHOWDOWN_r6+>
*/
// strstr_SHORT-SHOWDOWN.c, revision 6, 2010 Dec 22.
// Home of Railgun_Quadruplet: www.sanmayce.com/Railgun
// In previous revisions invocation of GNU strstr were with an overhead: 2 parameters not needed, sorry, it was unintentionally.
// Wow, another catastrophe of mine,
// Karp-Rabin-Kaze appears to be inferior compared to GNU strstr.
// Back to school... no, no, back to kindergarten!
// Karp-Rabin-Kaze bends a knee before strstr_GNU_C_Library, bravo Stephen R. van den Berg, I will have fun reading-understanding-learning it, for sure, thanks.
// Here a lite testbed(OSHO.TXT) is set, the other(26GB with 400+ million lines) will be tested off-line later, he-he.
// Thanks a lot also to,
// http://www-igm.univ-mlv.fr/~lecroq/string/index.html
// amazing site and GREAT/RICH resource on search technique.
// Revision 5: What a dumbo-tester I am, to test only with one(from 2002) C compiler, the damage is undone, here is:
// Intel(R) C++ Compiler Professional for applications running on IA-32, Version 11.1(from 2010) along with
// Microsoft (R) 32-bit C/C++ Optimizing Compiler Version 13.10.3077 for 80x86.
// Compile line: icl /O3 /Qparallel strstr_SHORT-SHOWDOWN.c
// Compile line: cl /Ox strstr_SHORT-SHOWDOWN.c
// I guess '/Qparallel' gives(don't know) a very little boost if any.
// Revision 6: Microsoft (R) Optimizing Compiler Version 16.00.30319.01 step in, from VS2010.
// Short bottomline: Railgun_Quadruplet is very good but needs to be boosted/refined!
// Note: I have some high hopes for Railgun_Quadruplet, especially for new architectures after Core 2 i.e. i3[+].
/*
[On Intel T3400 CPU: Microsoft 16.00.30319.01 vs Intel IA-32 11.1:]
Input file: OSHO.TXT
Number/Size of lines: 2,459,508/206,908,949
Shortest/Longest line: 0/161
[strstr_SHORT-SHOWDOWN_Microsoft_Ox.exe:]
Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 75781/786
strstr_Microsoft performance: 172KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 75781/483
strstr_GNU_C_Library performance: 279KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 75781/515
Railgun performance: 262KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 75781/509
Railgun_Quadruplet performance: 265KB/clock
StrnglenTRAVERSED: 138478024 bytes
Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 48760/896
strstr_Microsoft performance: 179KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 48760/540
strstr_GNU_C_Library performance: 297KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 48760/546
Railgun performance: 294KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 48760/540
Railgun_Quadruplet performance: 297KB/clock
StrnglenTRAVERSED: 164505415 bytes
Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/929
strstr_Microsoft performance: 214KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/485
strstr_GNU_C_Library performance: 411KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/588
Railgun performance: 339KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/584
Railgun_Quadruplet performance: 341KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 74500/807
strstr_Microsoft performance: 164KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 74500/518
strstr_GNU_C_Library performance: 256KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 74500/505
Railgun performance: 262KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 74500/503
Railgun_Quadruplet performance: 263KB/clock
StrnglenTRAVERSED: 135882884 bytes
Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 336/946
strstr_Microsoft performance: 210KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 336/503
strstr_GNU_C_Library performance: 396KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 336/588
Railgun performance: 339KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 336/504
Railgun_Quadruplet performance: 395KB/clock
StrnglenTRAVERSED: 204186782 bytes
Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 301/1123
strstr_Microsoft performance: 177KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 301/672
strstr_GNU_C_Library performance: 296KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 301/590
Railgun performance: 337KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 301/660
Railgun_Quadruplet performance: 302KB/clock
StrnglenTRAVERSED: 204202166 bytes
Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/953
strstr_Microsoft performance: 209KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/507
strstr_GNU_C_Library performance: 393KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/584
Railgun performance: 341KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/505
Railgun_Quadruplet performance: 395KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1120
strstr_Microsoft performance: 178KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/663
strstr_GNU_C_Library performance: 301KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/590
Railgun performance: 338KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/658
Railgun_Quadruplet performance: 303KB/clock
StrnglenTRAVERSED: 204449414 bytes
Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/947
strstr_Microsoft performance: 210KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/503
strstr_GNU_C_Library performance: 396KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/576
Railgun performance: 346KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/497
Railgun_Quadruplet performance: 401KB/clock
StrnglenTRAVERSED: 204449363 bytes
Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 19/1044
strstr_Microsoft performance: 191KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 19/589
strstr_GNU_C_Library performance: 338KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 19/577
Railgun performance: 345KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 19/575
Railgun_Quadruplet performance: 347KB/clock
StrnglenTRAVERSED: 204432134 bytes
Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/979
strstr_Microsoft performance: 203KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/530
strstr_GNU_C_Library performance: 376KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/573
Railgun performance: 348KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/518
Railgun_Quadruplet performance: 385KB/clock
StrnglenTRAVERSED: 204449185 bytes
Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1023
strstr_Microsoft performance: 195KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/575
strstr_GNU_C_Library performance: 347KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/575
Railgun performance: 347KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/561
Railgun_Quadruplet performance: 355KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 8/948
strstr_Microsoft performance: 210KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 8/503
strstr_GNU_C_Library performance: 396KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 8/561
Railgun performance: 355KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 8/485
Railgun_Quadruplet performance: 411KB/clock
StrnglenTRAVERSED: 204441500 bytes
Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1021
strstr_Microsoft performance: 195KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/573
strstr_GNU_C_Library performance: 348KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/567
Railgun performance: 352KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/549
Railgun_Quadruplet performance: 363KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/934
strstr_Microsoft performance: 213KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/490
strstr_GNU_C_Library performance: 407KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/540
Railgun performance: 369KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/456
Railgun_Quadruplet performance: 437KB/clock
StrnglenTRAVERSED: 204449346 bytes
Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 32/1010
strstr_Microsoft performance: 197KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 32/560
strstr_GNU_C_Library performance: 356KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 32/545
Railgun performance: 366KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 32/512
Railgun_Quadruplet performance: 389KB/clock
StrnglenTRAVERSED: 204422699 bytes
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun performance: 300KB/clock
Found ('to') 1076629 time(s), Railgun performance: 300KB/clock
Found ('TDK') 0 time(s), Railgun performance: 515KB/clock
Found ('the') 2114180 time(s), Railgun performance: 403KB/clock
Found ('fast') 5945 time(s), Railgun performance: 561KB/clock
Found ('easy') 5191 time(s), Railgun performance: 612KB/clock
Found ('grmbl') 0 time(s), Railgun performance: 805KB/clock
Found ('email') 1 time(s), Railgun performance: 716KB/clock
Found ('pasting') 2 time(s), Railgun performance: 990KB/clock
Found ('amazing') 323 time(s), Railgun performance: 990KB/clock
Found ('underdog') 4 time(s), Railgun performance: 1167KB/clock
Found ('superdog') 0 time(s), Railgun performance: 1074KB/clock
Found ('participants') 147 time(s), Railgun performance: 1603KB/clock
Found ('skillessness') 0 time(s), Railgun performance: 1422KB/clock
Found ('I should have known') 1 time(s), Railgun performance: 1603KB/clock
Found ('human consciousness') 519 time(s), Railgun performance: 1603KB/clock
Railgun 8x2 i.e. average performance: 682KB/clock
[strstr_SHORT-SHOWDOWN_Intel_O3_Qparallel.exe:]
Searching for Pattern('an',2bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 75781/917
strstr_Microsoft performance: 147KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 75781/503
strstr_GNU_C_Library performance: 268KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 75781/526
Railgun performance: 257KB/clock
StrnglenTRAVERSED: 138478024 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 75781/484
Railgun_Quadruplet performance: 279KB/clock
StrnglenTRAVERSED: 138478024 bytes
Searching for Pattern('to',2bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 48760/1050
strstr_Microsoft performance: 152KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 48760/562
strstr_GNU_C_Library performance: 285KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 48760/550
Railgun performance: 292KB/clock
StrnglenTRAVERSED: 164505415 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 48760/518
Railgun_Quadruplet performance: 310KB/clock
StrnglenTRAVERSED: 164505415 bytes
Searching for Pattern('TDK',3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1120
strstr_Microsoft performance: 178KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/532
strstr_GNU_C_Library performance: 375KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/586
Railgun performance: 340KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/569
Railgun_Quadruplet performance: 350KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('the',3bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 74500/938
strstr_Microsoft performance: 141KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 74500/543
strstr_GNU_C_Library performance: 244KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 74500/507
Railgun performance: 261KB/clock
StrnglenTRAVERSED: 135882884 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 74500/488
Railgun_Quadruplet performance: 271KB/clock
StrnglenTRAVERSED: 135882884 bytes
Searching for Pattern('fast',4bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 336/1135
strstr_Microsoft performance: 175KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 336/531
strstr_GNU_C_Library performance: 375KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 336/587
Railgun performance: 339KB/clock
StrnglenTRAVERSED: 204186782 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 336/521
Railgun_Quadruplet performance: 382KB/clock
StrnglenTRAVERSED: 204186782 bytes
Searching for Pattern('easy',4bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 301/1310
strstr_Microsoft performance: 152KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 301/709
strstr_GNU_C_Library performance: 281KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 301/588
Railgun performance: 339KB/clock
StrnglenTRAVERSED: 204202166 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 301/669
Railgun_Quadruplet performance: 298KB/clock
StrnglenTRAVERSED: 204202166 bytes
Searching for Pattern('grmbl',5bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1140
strstr_Microsoft performance: 175KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/532
strstr_GNU_C_Library performance: 375KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/580
Railgun performance: 344KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/521
Railgun_Quadruplet performance: 383KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('email',5bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1307
strstr_Microsoft performance: 152KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/699
strstr_GNU_C_Library performance: 285KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/587
Railgun performance: 340KB/clock
StrnglenTRAVERSED: 204449414 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/666
Railgun_Quadruplet performance: 299KB/clock
StrnglenTRAVERSED: 204449414 bytes
Searching for Pattern('pasting',7bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1135
strstr_Microsoft performance: 175KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/533
strstr_GNU_C_Library performance: 374KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/574
Railgun performance: 347KB/clock
StrnglenTRAVERSED: 204449363 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/513
Railgun_Quadruplet performance: 389KB/clock
StrnglenTRAVERSED: 204449363 bytes
Searching for Pattern('amazing',7bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 19/1230
strstr_Microsoft performance: 162KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 19/610
strstr_GNU_C_Library performance: 327KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 19/574
Railgun performance: 347KB/clock
StrnglenTRAVERSED: 204432134 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 19/586
Railgun_Quadruplet performance: 340KB/clock
StrnglenTRAVERSED: 204432134 bytes
Searching for Pattern('underdog',8bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1169
strstr_Microsoft performance: 170KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/549
strstr_GNU_C_Library performance: 363KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/570
Railgun performance: 350KB/clock
StrnglenTRAVERSED: 204449185 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/531
Railgun_Quadruplet performance: 376KB/clock
StrnglenTRAVERSED: 204449185 bytes
Searching for Pattern('superdog',8bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1212
strstr_Microsoft performance: 164KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/595
strstr_GNU_C_Library performance: 335KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/573
Railgun performance: 348KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/573
Railgun_Quadruplet performance: 348KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('participants',12bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 8/1137
strstr_Microsoft performance: 175KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 8/535
strstr_GNU_C_Library performance: 373KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 8/559
Railgun performance: 357KB/clock
StrnglenTRAVERSED: 204441500 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 8/499
Railgun_Quadruplet performance: 400KB/clock
StrnglenTRAVERSED: 204441500 bytes
Searching for Pattern('skillessness',12bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1212
strstr_Microsoft performance: 164KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/593
strstr_GNU_C_Library performance: 336KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/565
Railgun performance: 353KB/clock
StrnglenTRAVERSED: 204449441 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/558
Railgun_Quadruplet performance: 357KB/clock
StrnglenTRAVERSED: 204449441 bytes
Searching for Pattern('I should have known',19bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 0/1126
strstr_Microsoft performance: 177KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 0/534
strstr_GNU_C_Library performance: 373KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 0/537
Railgun performance: 371KB/clock
StrnglenTRAVERSED: 204449346 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 0/470
Railgun_Quadruplet performance: 424KB/clock
StrnglenTRAVERSED: 204449346 bytes
Searching for Pattern('human consciousness',19bytes) into String(206908949bytes) line-by-line ...
LinesEncountered: 2459508
strstr_Microsoft_hits/strstr_Microsoft_clocks: 32/1200
strstr_Microsoft performance: 166KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: 32/576
strstr_GNU_C_Library performance: 346KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
Railgun_hits/Railgun_clocks: 32/543
Railgun performance: 367KB/clock
StrnglenTRAVERSED: 204422699 bytes
LinesEncountered: 2459508
Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: 32/522
Railgun_Quadruplet performance: 382KB/clock
StrnglenTRAVERSED: 204422699 bytes
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun performance: 461KB/clock
Found ('to') 1076629 time(s), Railgun performance: 495KB/clock
Found ('TDK') 0 time(s), Railgun performance: 859KB/clock
Found ('the') 2114180 time(s), Railgun performance: 585KB/clock
Found ('fast') 5945 time(s), Railgun performance: 922KB/clock
Found ('easy') 5191 time(s), Railgun performance: 985KB/clock
Found ('grmbl') 0 time(s), Railgun performance: 1287KB/clock
Found ('email') 1 time(s), Railgun performance: 1167KB/clock
Found ('pasting') 2 time(s), Railgun performance: 1603KB/clock
Found ('amazing') 323 time(s), Railgun performance: 1603KB/clock
Found ('underdog') 4 time(s), Railgun performance: 1836KB/clock
Found ('superdog') 0 time(s), Railgun performance: 1836KB/clock
Found ('participants') 147 time(s), Railgun performance: 2525KB/clock
Found ('skillessness') 0 time(s), Railgun performance: 2149KB/clock
Found ('I should have known') 1 time(s), Railgun performance: 2126KB/clock
Found ('human consciousness') 519 time(s), Railgun performance: 2557KB/clock
Railgun 8x2 i.e. average performance: 1085KB/clock
*/
/*
PUBLIC _Railgun_Quadruplet
; Function compile flags: /Ogtpy
_TEXT SEGMENT
tv701 = -1056 ; size = 4
tv544 = -1056 ; size = 4
$T5950 = -1056 ; size = 4
_Quadruplet3rd$ = -1052 ; size = 4
_countSTATIC$ = -1052 ; size = 4
_pbPattern$GSCopy$ = -1048 ; size = 4
tv698 = -1044 ; size = 4
_Quadruplet2nd$ = -1044 ; size = 4
tv569 = -1040 ; size = 4
_ulHashPattern$ = -1040 ; size = 4
tv392 = -1036 ; size = 4
_Quadruplet4th$ = -1036 ; size = 4
_pbTargetMax$ = -1032 ; size = 4
_bm_bc$ = -1028 ; size = 1024
__$ArrayPad$ = -4 ; size = 4
_pbTarget$ = 8 ; size = 4
_pbPattern$ = 12 ; size = 4
_cbTarget$ = 16 ; size = 4
_cbPattern$ = 20 ; size = 4
_Railgun_Quadruplet PROC
; 1001 : {
00420 81 ec 20 04 00
00 sub esp, 1056 ; 00000420H
00426 a1 00 00 00 00 mov eax, DWORD PTR ___security_cookie
0042b 33 c4 xor eax, esp
0042d 89 84 24 1c 04
00 00 mov DWORD PTR __$ArrayPad$[esp+1056], eax
; 1002 : char * pbTargetMax = pbTarget + cbTarget;
00434 8b 94 24 2c 04
00 00 mov edx, DWORD PTR _cbTarget$[esp+1052]
0043b 53 push ebx
0043c 8b 9c 24 28 04
00 00 mov ebx, DWORD PTR _pbTarget$[esp+1056]
00443 55 push ebp
; 1003 : register unsigned long ulHashPattern;
; 1004 : unsigned long ulHashTarget;
; 1005 : unsigned long count;
; 1006 : unsigned long countSTATIC;
; 1007 : // unsigned long countRemainder;
; 1008 :
; 1009 : /~
; 1010 : const unsigned char SINGLET = *(char *)(pbPattern);
; 1011 : const unsigned long Quadruplet2nd = SINGLET<<8;
; 1012 : const unsigned long Quadruplet3rd = SINGLET<<16;
; 1013 : const unsigned long Quadruplet4th = SINGLET<<24;
; 1014 : ~/
; 1015 : unsigned char SINGLET;
; 1016 : unsigned long Quadruplet2nd;
; 1017 : unsigned long Quadruplet3rd;
; 1018 : unsigned long Quadruplet4th;
; 1019 :
; 1020 : unsigned long AdvanceHopperGrass;
; 1021 :
; 1022 : long i; //BMH needed
; 1023 : int a, j, bm_bc[ASIZE]; //BMH needed
; 1024 : unsigned char ch; //BMH needed
; 1025 : // unsigned char lastch, firstch; //BMH needed
; 1026 :
; 1027 : if (cbPattern > cbTarget)
00444 8b ac 24 38 04
00 00 mov ebp, DWORD PTR _cbPattern$[esp+1060]
0044b 56 push esi
0044c 8b b4 24 34 04
00 00 mov esi, DWORD PTR _pbPattern$[esp+1064]
00453 8d 04 13 lea eax, DWORD PTR [ebx+edx]
00456 89 74 24 14 mov DWORD PTR _pbPattern$GSCopy$[esp+1068], esi
0045a 89 44 24 24 mov DWORD PTR _pbTargetMax$[esp+1068], eax
0045e 3b ea cmp ebp, edx
00460 76 1a jbe SHORT $LN36@Railgun_Qu
; 1028 : return(NULL);
00462 5e pop esi
00463 5d pop ebp
00464 33 c0 xor eax, eax
00466 5b pop ebx
; 1151 : } //if (cbTarget<961)
; 1152 : } //if ( cbPattern<4)
; 1153 : }
00467 8b 8c 24 1c 04
00 00 mov ecx, DWORD PTR __$ArrayPad$[esp+1056]
0046e 33 cc xor ecx, esp
00470 e8 00 00 00 00 call @__security_check_cookie@4
00475 81 c4 20 04 00
00 add esp, 1056 ; 00000420H
0047b c3 ret 0
$LN36@Railgun_Qu:
0047c 57 push edi
; 1029 :
; 1030 : // Doesn't work when cbPattern = 1
; 1031 : // The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
; 1032 : if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
0047d 83 fd 04 cmp ebp, 4
00480 0f 83 8b 00 00
00 jae $LN35@Railgun_Qu
; 1033 : pbTarget = pbTarget+cbPattern;
; 1034 : ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
00486 0f be 0e movsx ecx, BYTE PTR [esi]
00489 0f be 7c 2e ff movsx edi, BYTE PTR [esi+ebp-1]
; 1083 : }
0048e b8 02 00 00 00 mov eax, 2
00493 2b c5 sub eax, ebp
00495 c1 e1 08 shl ecx, 8
00498 89 44 24 1c mov DWORD PTR tv698[esp+1072], eax
0049c b8 01 00 00 00 mov eax, 1
004a1 03 dd add ebx, ebp
004a3 03 f9 add edi, ecx
004a5 2b c5 sub eax, ebp
004a7 89 44 24 10 mov DWORD PTR tv701[esp+1072], eax
004ab eb 03 8d 49 00 npad 5
$LL34@Railgun_Qu:
; 1035 : countSTATIC = cbPattern-2;
; 1036 :
; 1037 : for ( ;; )
; 1038 : {
; 1039 : // The line below gives for 'cbPattern'>=1:
; 1040 : // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
; 1041 : // Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
; 1042 : /~
; 1043 : if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
; 1044 : return((long)(pbTarget-cbPattern));
; 1045 : ~/
; 1046 :
; 1047 : // The fragment below gives for 'cbPattern'>=2:
; 1048 : // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
; 1049 : // Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock
; 1050 :
; 1051 : if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
004b0 0f be 54 18 ff movsx edx, BYTE PTR [eax+ebx-1]
004b5 0f be 4b ff movsx ecx, BYTE PTR [ebx-1]
004b9 c1 e2 08 shl edx, 8
004bc 03 d1 add edx, ecx
004be 3b fa cmp edi, edx
004c0 75 2d jne SHORT $LN64@Railgun_Qu
; 1052 : count = countSTATIC;
004c2 8d 45 fe lea eax, DWORD PTR [ebp-2]
; 1053 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
004c5 85 c0 test eax, eax
004c7 74 14 je SHORT $LN58@Railgun_Qu
; 1052 : count = countSTATIC;
004c9 8b 54 24 1c mov edx, DWORD PTR tv698[esp+1072]
004cd 46 inc esi
004ce 8d 4c 1a ff lea ecx, DWORD PTR [edx+ebx-1]
$LL31@Railgun_Qu:
; 1053 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
004d2 8a 16 mov dl, BYTE PTR [esi]
004d4 3a 11 cmp dl, BYTE PTR [ecx]
004d6 75 0b jne SHORT $LN30@Railgun_Qu
; 1054 : count--;
004d8 41 inc ecx
004d9 46 inc esi
004da 48 dec eax
004db 75 f5 jne SHORT $LL31@Railgun_Qu
$LN58@Railgun_Qu:
; 1055 : }
; 1056 : if ( count == 0) return((pbTarget-cbPattern));
004dd 8b c3 mov eax, ebx
004df 2b c5 sub eax, ebp
; 1082 : return(NULL);
004e1 eb 15 jmp SHORT $LN65@Railgun_Qu
$LN30@Railgun_Qu:
; 1055 : }
; 1056 : if ( count == 0) return((pbTarget-cbPattern));
004e3 85 c0 test eax, eax
004e5 74 f6 je SHORT $LN58@Railgun_Qu
004e7 8b 74 24 18 mov esi, DWORD PTR _pbPattern$GSCopy$[esp+1072]
004eb 8b 44 24 10 mov eax, DWORD PTR tv701[esp+1072]
$LN64@Railgun_Qu:
; 1057 : }
; 1058 :
; 1059 : // The fragment below gives for 'cbPattern'>=2:
; 1060 : // Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
; 1061 : // Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
; 1062 : /~
; 1063 : if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
; 1064 : count = countSTATIC>>2;
; 1065 : countRemainder = countSTATIC % 4;
; 1066 :
; 1067 : while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
; 1068 : count--;
; 1069 : }
; 1070 : //if (count == 0) { // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
; 1071 : while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
; 1072 : countRemainder--;
; 1073 : }
; 1074 : //if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
; 1075 : if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
; 1076 : //}
; 1077 : }
; 1078 : ~/
; 1079 :
; 1080 : pbTarget++;
004ef 43 inc ebx
; 1081 : if (pbTarget > pbTargetMax)
004f0 3b 5c 24 28 cmp ebx, DWORD PTR _pbTargetMax$[esp+1072]
004f4 76 ba jbe SHORT $LL34@Railgun_Qu
$LN5@Railgun_Qu:
; 1149 : }
; 1150 : return(NULL);
004f6 33 c0 xor eax, eax
$LN65@Railgun_Qu:
; 1151 : } //if (cbTarget<961)
; 1152 : } //if ( cbPattern<4)
; 1153 : }
004f8 8b 8c 24 2c 04
00 00 mov ecx, DWORD PTR __$ArrayPad$[esp+1072]
004ff 5f pop edi
00500 5e pop esi
00501 5d pop ebp
00502 5b pop ebx
00503 33 cc xor ecx, esp
00505 e8 00 00 00 00 call @__security_check_cookie@4
0050a 81 c4 20 04 00
00 add esp, 1056 ; 00000420H
00510 c3 ret 0
$LN35@Railgun_Qu:
; 1084 : } else { //if ( cbPattern<4)
; 1085 : if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
00511 81 fa c1 03 00
00 cmp edx, 961 ; 000003c1H
00517 0f 83 dd 00 00
00 jae $LN26@Railgun_Qu
; 1086 : {
; 1087 : pbTarget = pbTarget+cbPattern;
; 1088 : ulHashPattern = *(unsigned long *)(pbPattern);
0051d 8b 16 mov edx, DWORD PTR [esi]
; 1089 : countSTATIC = cbPattern-1;
; 1090 :
; 1091 : //SINGLET = *(char *)(pbPattern);
; 1092 : SINGLET = ulHashPattern & 0xFF;
; 1093 : Quadruplet2nd = SINGLET<<8;
0051f 0f b6 c2 movzx eax, dl
00522 8b c8 mov ecx, eax
00524 c1 e1 08 shl ecx, 8
00527 89 4c 24 1c mov DWORD PTR _Quadruplet2nd$[esp+1072], ecx
; 1094 : Quadruplet3rd = SINGLET<<16;
0052b 8b c8 mov ecx, eax
0052d 03 dd add ebx, ebp
0052f c1 e1 10 shl ecx, 16 ; 00000010H
00532 89 44 24 10 mov DWORD PTR tv544[esp+1072], eax
; 1095 : Quadruplet4th = SINGLET<<24;
00536 c1 e0 18 shl eax, 24 ; 00000018H
00539 89 9c 24 34 04
00 00 mov DWORD PTR _pbTarget$[esp+1068], ebx
00540 89 54 24 20 mov DWORD PTR _ulHashPattern$[esp+1072], edx
00544 89 4c 24 14 mov DWORD PTR _Quadruplet3rd$[esp+1072], ecx
00548 89 44 24 24 mov DWORD PTR _Quadruplet4th$[esp+1072], eax
0054c 8d 64 24 00 npad 4
$LL25@Railgun_Qu:
; 1096 :
; 1097 : for ( ;; )
; 1098 : {
; 1099 : AdvanceHopperGrass = 0;
; 1100 : ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);
00550 8b cb mov ecx, ebx
00552 2b cd sub ecx, ebp
00554 8b 01 mov eax, DWORD PTR [ecx]
00556 33 f6 xor esi, esi
; 1101 :
; 1102 : if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
00558 3b d0 cmp edx, eax
0055a 75 4a jne SHORT $LN23@Railgun_Qu
; 1103 : count = countSTATIC;
0055c 8d 45 ff lea eax, DWORD PTR [ebp-1]
; 1104 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
0055f 85 c0 test eax, eax
00561 0f 84 76 ff ff
ff je $LN58@Railgun_Qu
; 1103 : count = countSTATIC;
00567 8b 54 24 18 mov edx, DWORD PTR _pbPattern$GSCopy$[esp+1072]
0056b 42 inc edx
0056c 8d 79 01 lea edi, DWORD PTR [ecx+1]
0056f 90 npad 1
$LL22@Railgun_Qu:
; 1104 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
00570 8a 0f mov cl, BYTE PTR [edi]
00572 38 0a cmp BYTE PTR [edx], cl
00574 75 26 jne SHORT $LN21@Railgun_Qu
; 1105 : if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
00576 8d 1c 06 lea ebx, DWORD PTR [esi+eax]
00579 8d 4d ff lea ecx, DWORD PTR [ebp-1]
0057c 3b cb cmp ecx, ebx
0057e 75 0a jne SHORT $LN62@Railgun_Qu
00580 0f be 0f movsx ecx, BYTE PTR [edi]
00583 39 4c 24 10 cmp DWORD PTR tv544[esp+1072], ecx
00587 74 01 je SHORT $LN62@Railgun_Qu
00589 46 inc esi
$LN62@Railgun_Qu:
; 1104 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
0058a 8b 9c 24 34 04
00 00 mov ebx, DWORD PTR _pbTarget$[esp+1068]
; 1106 : count--;
00591 42 inc edx
00592 47 inc edi
00593 48 dec eax
00594 0f 84 43 ff ff
ff je $LN58@Railgun_Qu
; 1104 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
0059a eb d4 jmp SHORT $LL22@Railgun_Qu
$LN21@Railgun_Qu:
; 1107 : }
; 1108 : if ( count == 0) return((pbTarget-cbPattern));
0059c 85 c0 test eax, eax
0059e 0f 84 39 ff ff
ff je $LN58@Railgun_Qu
; 1109 : } else { // The goal here: to avoid memory accesses by stressing the registers.
005a4 eb 36 jmp SHORT $LN15@Railgun_Qu
$LN23@Railgun_Qu:
; 1110 : if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
005a6 8b d0 mov edx, eax
005a8 81 e2 00 ff 00
00 and edx, 65280 ; 0000ff00H
005ae 39 54 24 1c cmp DWORD PTR _Quadruplet2nd$[esp+1072], edx
005b2 74 28 je SHORT $LN15@Railgun_Qu
; 1111 : AdvanceHopperGrass++;
; 1112 : if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
005b4 8b c8 mov ecx, eax
005b6 81 e1 00 00 ff
00 and ecx, 16711680 ; 00ff0000H
005bc be 01 00 00 00 mov esi, 1
005c1 39 4c 24 14 cmp DWORD PTR _Quadruplet3rd$[esp+1072], ecx
005c5 74 15 je SHORT $LN15@Railgun_Qu
; 1113 : AdvanceHopperGrass++;
; 1114 : if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;;
005c7 25 00 00 00 ff and eax, -16777216 ; ff000000H
005cc be 02 00 00 00 mov esi, 2
005d1 39 44 24 24 cmp DWORD PTR _Quadruplet4th$[esp+1072], eax
005d5 74 05 je SHORT $LN15@Railgun_Qu
005d7 be 03 00 00 00 mov esi, 3
$LN15@Railgun_Qu:
; 1115 : }
; 1116 : }
; 1117 : }
; 1118 :
; 1119 : AdvanceHopperGrass++;
; 1120 :
; 1121 : pbTarget = pbTarget + AdvanceHopperGrass;
005dc 8d 5c 33 01 lea ebx, DWORD PTR [ebx+esi+1]
005e0 89 9c 24 34 04
00 00 mov DWORD PTR _pbTarget$[esp+1068], ebx
; 1122 : if (pbTarget > pbTargetMax)
005e7 3b 5c 24 28 cmp ebx, DWORD PTR _pbTargetMax$[esp+1072]
005eb 0f 87 05 ff ff
ff ja $LN5@Railgun_Qu
; 1123 : return(NULL);
; 1124 : }
005f1 8b 54 24 20 mov edx, DWORD PTR _ulHashPattern$[esp+1072]
005f5 e9 56 ff ff ff jmp $LL25@Railgun_Qu
$LN26@Railgun_Qu:
; 1125 : } else { //if (cbTarget<961)
; 1126 : countSTATIC = cbPattern-2;
005fa 8d 45 fe lea eax, DWORD PTR [ebp-2]
005fd 89 44 24 14 mov DWORD PTR _countSTATIC$[esp+1072], eax
; 1127 : /~ Preprocessing ~/
; 1128 : for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
00601 8b c5 mov eax, ebp
00603 b9 00 01 00 00 mov ecx, 256 ; 00000100H
00608 8d 7c 24 2c lea edi, DWORD PTR _bm_bc$[esp+1072]
0060c f3 ab rep stosd
; 1129 : for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;
0060e 8d 4d ff lea ecx, DWORD PTR [ebp-1]
00611 33 c0 xor eax, eax
00613 85 c9 test ecx, ecx
00615 74 1a je SHORT $LN7@Railgun_Qu
00617 8d 7d ff lea edi, DWORD PTR [ebp-1]
0061a 8d 9b 00 00 00
00 npad 6
$LL9@Railgun_Qu:
00620 0f be 0c 06 movsx ecx, BYTE PTR [esi+eax]
00624 89 7c 8c 2c mov DWORD PTR _bm_bc$[esp+ecx*4+1072], edi
00628 40 inc eax
00629 8d 4d ff lea ecx, DWORD PTR [ebp-1]
0062c 4f dec edi
0062d 3b c1 cmp eax, ecx
0062f 72 ef jb SHORT $LL9@Railgun_Qu
$LN7@Railgun_Qu:
; 1130 :
; 1131 : /~ Searching ~/
; 1132 : //lastch=pbPattern[cbPattern-1];
; 1133 : //firstch=pbPattern[0];
; 1134 : i=0;
; 1135 : while (i <= cbTarget-cbPattern) {
00631 0f be 44 2e ff movsx eax, BYTE PTR [esi+ebp-1]
00636 33 ff xor edi, edi
00638 2b d5 sub edx, ebp
0063a 89 54 24 10 mov DWORD PTR $T5950[esp+1072], edx
0063e 8d 54 2b ff lea edx, DWORD PTR [ebx+ebp-1]
00642 89 54 24 20 mov DWORD PTR tv569[esp+1072], edx
00646 89 44 24 24 mov DWORD PTR tv392[esp+1072], eax
0064a 8d 9b 00 00 00
00 npad 6
$LL6@Railgun_Qu:
; 1136 : ch=pbTarget[i+cbPattern-1];
; 1137 : //if (ch ==lastch)
; 1138 : //if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
; 1139 : //if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
; 1140 : if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
00650 8b 4c 24 20 mov ecx, DWORD PTR tv569[esp+1072]
00654 0f b6 2c 39 movzx ebp, BYTE PTR [ecx+edi]
00658 3b 6c 24 24 cmp ebp, DWORD PTR tv392[esp+1072]
0065c 75 2f jne SHORT $LN1@Railgun_Qu
0065e 8a 14 1f mov dl, BYTE PTR [edi+ebx]
00661 3a 16 cmp dl, BYTE PTR [esi]
00663 75 28 jne SHORT $LN1@Railgun_Qu
; 1141 : {
; 1142 : count = countSTATIC;
00665 8b 44 24 14 mov eax, DWORD PTR _countSTATIC$[esp+1072]
; 1143 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
00669 85 c0 test eax, eax
0066b 74 10 je SHORT $LN51@Railgun_Qu
; 1141 : {
; 1142 : count = countSTATIC;
0066d 46 inc esi
0066e 8d 4c 1f 01 lea ecx, DWORD PTR [edi+ebx+1]
$LL3@Railgun_Qu:
; 1143 : while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
00672 8a 16 mov dl, BYTE PTR [esi]
00674 3a 11 cmp dl, BYTE PTR [ecx]
00676 75 0d jne SHORT $LN2@Railgun_Qu
; 1144 : count--;
00678 41 inc ecx
00679 46 inc esi
0067a 48 dec eax
0067b 75 f5 jne SHORT $LL3@Railgun_Qu
$LN51@Railgun_Qu:
; 1145 : }
; 1146 : if ( count == 0) return(pbTarget+i);
0067d 8d 04 1f lea eax, DWORD PTR [edi+ebx]
00680 e9 73 fe ff ff jmp $LN65@Railgun_Qu
$LN2@Railgun_Qu:
00685 85 c0 test eax, eax
00687 74 f4 je SHORT $LN51@Railgun_Qu
00689 8b 74 24 18 mov esi, DWORD PTR _pbPattern$GSCopy$[esp+1072]
$LN1@Railgun_Qu:
; 1147 : }
; 1148 : i+=bm_bc[ch];
0068d 03 7c ac 2c add edi, DWORD PTR _bm_bc$[esp+ebp*4+1072]
00691 3b 7c 24 10 cmp edi, DWORD PTR $T5950[esp+1072]
00695 76 b9 jbe SHORT $LL6@Railgun_Qu
; 1130 :
; 1131 : /~ Searching ~/
; 1132 : //lastch=pbPattern[cbPattern-1];
; 1133 : //firstch=pbPattern[0];
; 1134 : i=0;
; 1135 : while (i <= cbTarget-cbPattern) {
00697 e9 5a fe ff ff jmp $LN5@Railgun_Qu
_Railgun_Quadruplet ENDP
*/
/*
On Intel T3400 CPU: IntelC 11.1 mashes MicrosoftC 13.10.3077 with ((1072-681)/681)*100=57%, le-le.
strstr_SHORT-SHOWDOWN_Microsoft.exe:
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun performance: 300KB/clock
Found ('to') 1076629 time(s), Railgun performance: 300KB/clock
Found ('TDK') 0 time(s), Railgun performance: 496KB/clock
Found ('the') 2114180 time(s), Railgun performance: 403KB/clock
Found ('fast') 5945 time(s), Railgun performance: 585KB/clock
Found ('easy') 5191 time(s), Railgun performance: 614KB/clock
Found ('grmbl') 0 time(s), Railgun performance: 805KB/clock
Found ('email') 1 time(s), Railgun performance: 716KB/clock
Found ('pasting') 2 time(s), Railgun performance: 990KB/clock
Found ('amazing') 323 time(s), Railgun performance: 990KB/clock
Found ('underdog') 4 time(s), Railgun performance: 1069KB/clock
Found ('superdog') 0 time(s), Railgun performance: 1167KB/clock
Found ('participants') 147 time(s), Railgun performance: 1433KB/clock
Found ('skillessness') 0 time(s), Railgun performance: 1422KB/clock
Found ('I should have known') 1 time(s), Railgun performance: 1433KB/clock
Found ('human consciousness') 519 time(s), Railgun performance: 1820KB/clock
Railgun 8x2 i.e. average performance: 681KB/clock
strstr_SHORT-SHOWDOWN_Intel.exe:
Doing Search for 8x2 Patterns into String(206908949bytes) as-one-line ...
Found ('an') 1987797 time(s), Railgun performance: 460KB/clock
Found ('to') 1076629 time(s), Railgun performance: 477KB/clock
Found ('TDK') 0 time(s), Railgun performance: 859KB/clock
Found ('the') 2114180 time(s), Railgun performance: 585KB/clock
Found ('fast') 5945 time(s), Railgun performance: 918KB/clock
Found ('easy') 5191 time(s), Railgun performance: 990KB/clock
Found ('grmbl') 0 time(s), Railgun performance: 1167KB/clock
Found ('email') 1 time(s), Railgun performance: 1167KB/clock
Found ('pasting') 2 time(s), Railgun performance: 1603KB/clock
Found ('amazing') 323 time(s), Railgun performance: 1603KB/clock
Found ('underdog') 4 time(s), Railgun performance: 1603KB/clock
Found ('superdog') 0 time(s), Railgun performance: 1836KB/clock
Found ('participants') 147 time(s), Railgun performance: 2126KB/clock
Found ('skillessness') 0 time(s), Railgun performance: 2149KB/clock
Found ('I should have known') 1 time(s), Railgun performance: 2126KB/clock
Found ('human consciousness') 519 time(s), Railgun performance: 2557KB/clock
Railgun 8x2 i.e. average performance: 1072KB/clock
*/
// Revision 2:
/*
DANNII MINOGUE:
Where do we go now?
I don't know
Innocence over
Fading fast
...
You're still promising perfection, perfection
With empty words
With empty words
With empty words
With empty words
And it's hard to break a habit
You're lost inside it
...
A moment of coldness
Cuts through me (cuts through me)
I've tried to remember
Why I don't leave (I don't leave)
And you're the cause of my confusion
Closing down the way I feel
How come I don't see so clearly, so clearly
...
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#ifndef NULL
#define NULL ((void*)0)
#endif
clock_t clocks1, clocks2;
clock_t clocks3, clocks4;
double TotalRoughSearchTime = 0;
long Railgunhits=0;
#define ASIZE 256
// ### Boyer-Moore-Horspool algorithm [
long HORSPOOL(y, x, n, m)
char *y, *x;
long n;
int m;
{
long i;
int a, j, bm_bc[ASIZE];
unsigned char ch, lastch;
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=m;
for (j=0; j < m-1; j++) bm_bc[x[j]]=m-j-1;
/* Searching */
lastch=x[m-1];
i=0;
while (i <= n-m) {
ch=y[i+m-1];
if (ch ==lastch)
//if (memcmp(&y[i],x,m-1) == 0) OUTPUT(i);
if (memcmp(&y[i],x,m-1) == 0) return(i);
i+=bm_bc[ch];
}
return(-1);
}
long HORSPOOL_hits(y, x, n, m)
char *y, *x;
long n;
int m;
{
long i;
int a, j, bm_bc[ASIZE];
unsigned char ch, lastch;
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=m;
for (j=0; j < m-1; j++) bm_bc[x[j]]=m-j-1;
/* Searching */
lastch=x[m-1];
i=0;
while (i <= n-m) {
ch=y[i+m-1];
if (ch ==lastch)
//if (memcmp(&y[i],x,m-1) == 0) OUTPUT(i);
//if (memcmp(&y[i],x,m-1) == 0) return(i);
if (memcmp(&y[i],x,m-1) == 0) Railgunhits++;
i+=bm_bc[ch];
}
return(-1);
}
long Boyer_Moore_Horspool_Kaze(y, x, n, m) // m>=2
char *y, *x;
long n;
int m;
{
long i;
int a, j, bm_bc[ASIZE];
unsigned char ch;
unsigned char lastch;
unsigned char firstch;
unsigned long count;
unsigned long countSTATIC;
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=m;
for (j=0; j < m-1; j++) bm_bc[x[j]]=m-j-1;
/* Searching */
lastch=x[m-1];
firstch=x[0];
i=0;
countSTATIC = m-2;
while (i <= n-m) {
ch=y[i+m-1];
//if (ch ==lastch)
//if (memcmp(&y[i],x,m-1) == 0) OUTPUT(i);
// Below line gives: 315KB/clock
//if (ch ==lastch && y[i] == firstch && memcmp(&y[i],x,m-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
// Below line gives: 328KB/clock
// if (ch == lastch && y[i] == firstch && memcmp(&y[i+1],&x[0+1],m-1-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
if (ch == lastch && y[i] == firstch)
{
count = countSTATIC;
while ( count && *(char *)(x+1+(countSTATIC-count)) == *(char *)(&y[i]+1+(countSTATIC-count)) ) {
count--;
}
if ( count == 0) return(i);
}
i+=bm_bc[ch];
}
return(-1);
}
// ### Boyer-Moore-Horspool algorithm ]
// ### Brute force 'Dummy' algorithm [
long Brute_Force_Dummy(char *y, char *x, long n, int m) {
long i, j;
/* Searching */
for (i=0; i <= n-m; i++) {
j=0;
while (j < m && y[i+j] == x[j]) j++;
if (j >= m) return(i);
}
return(-1);
}
// ### Brute force 'Dummy' algorithm ]
// ### Karp-Rabin algorithm [
#define REHASH(a, b, h) ((((h) - (a)*d) << 1) + (b))
long Karp_Rabin(char *y, char *x, long n, int m) {
int d, hx, hy, i, j;
/* Preprocessing */
/* computes d = 2^(m-1) with
the left-shift operator */
for (d = i = 1; i < m; ++i)
d = (d<<1);
for (hy = hx = i = 0; i < m; ++i) {
hx = ((hx<<1) + x[i]);
hy = ((hy<<1) + y[i]);
}
/* Searching */
j = 0;
while (j <= n-m) {
if (hx == hy && memcmp(x, y + j, m) == 0) return(j);
hy = REHASH(y[j], y[j + m], hy);
++j;
}
return(-1);
}
// ### Karp-Rabin algorithm ]
// ### Karp-Rabin-Kaze algorithm [
char * KarpRabinKaze (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
unsigned int i;
char * pbTargetMax = pbTarget + cbTarget;
char * pbPatternMax = pbPattern + cbPattern;
unsigned long ulBaseToPowerMod = 1;
register unsigned long ulHashPattern = 0;
unsigned long ulHashTarget = 0;
long hits = 0;
//unsigned long count;
//char * buf1;
//char * buf2;
if (cbPattern > cbTarget)
return(NULL);
// Compute the power of the left most character in base ulBase
//for (i = 1; i < cbPattern; i++) ulBaseToPowerMod = (ulBase * ulBaseToPowerMod);
// Calculate the hash function for the src (and the first dst)
while (pbPattern < pbPatternMax)
{
// Below lines give 366KB/clock for 'underdog':
//ulHashPattern = (ulHashPattern*ulBase + *pbPattern);
//ulHashTarget = (ulHashTarget*ulBase + *pbTarget);
pbPattern++;
pbTarget++;
}
// Below lines give 436KB/clock for 'underdog' + requirement pattern to be 4 chars min.:
//ulHashPattern = ( (*(long *)(pbPattern-cbPattern)) & 0xffffff00 ) + *(pbPattern-1);
//ulHashTarget = ( (*(long *)(pbTarget-cbPattern)) & 0xffffff00 ) + *(pbTarget-1);
// Below lines give 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
//ulHashPattern = ( (*(unsigned short *)(pbPattern-cbPattern)) | *(pbPattern-1) );
//ulHashTarget = ( (*(unsigned short *)(pbTarget-cbPattern)) | *(pbTarget-1) );
// Below lines give 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
//ulHashPattern = ( (*(unsigned short *)(pbPattern-cbPattern)) & 0xff00 ) + *(pbPattern-1);
//ulHashTarget = ( (*(unsigned short *)(pbTarget-cbPattern)) & 0xff00 ) + *(pbTarget-1);
// Below lines give 605KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
//ulHashPattern = ( (*(unsigned short *)(pbPattern-cbPattern))<<8 ) + *(pbPattern-1);
//ulHashTarget = ( (*(unsigned short *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1);
// Below lines give 668KB/clock for 'underdog':
ulHashPattern = ( (*(char *)(pbPattern-cbPattern))<<8 ) + *(pbPattern-1);
ulHashTarget = ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1);
// Dynamically produce hash values for the string as we go
for ( ;; )
{
if ( (ulHashPattern == ulHashTarget) && !memcmp(pbPattern-cbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
// if ( ulHashPattern == ulHashTarget ) {
//
// count = cbPattern;
// buf1 = pbPattern-cbPattern;
// buf2 = pbTarget-cbPattern;
// while ( --count && *(char *)buf1 == *(char *)buf2 ) {
// buf1 = (char *)buf1 + 1;
// buf2 = (char *)buf2 + 1;
// }
//
// if ( *((unsigned char *)buf1) - *((unsigned char *)buf2) == 0) hits++;
// }
return((pbTarget-cbPattern));
//hits++;
if (pbTarget == pbTargetMax)
return(NULL);
// Below line gives 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
//ulHashTarget = ( (*(unsigned short *)(pbTarget+1-cbPattern)) | *pbTarget );
// Below line gives 436KB/clock for 'underdog' + requirement pattern to be 4 chars min.:
//ulHashTarget = ( (*(long *)(pbTarget+1-cbPattern)) & 0xffffff00 ) + *pbTarget;
//; Line 696
// movsx esi, BYTE PTR [ebx]
// mov ecx, DWORD PTR [edx+1]
// and ecx, -256 ; ffffff00H
// add ecx, esi
// Below line gives 482KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
//ulHashTarget = ( (*(unsigned short *)(pbTarget+1-cbPattern)) & 0xff00 ) + *pbTarget;
//; Line 691
// movsx esi, BYTE PTR [ebx]
// xor ecx, ecx
// mov cx, WORD PTR [edx+1]
// and ecx, 65280 ; 0000ff00H
// add ecx, esi
// Below line gives 605KB/clock for 'underdog' + requirement pattern to be 2 chars min.:
//ulHashTarget = ( (*(unsigned short *)(pbTarget+1-cbPattern))<<8 ) + *pbTarget;
// Below line gives 668KB/clock for 'underdog':
ulHashTarget = ( (*(char *)(pbTarget+1-cbPattern))<<8 ) + *pbTarget;
//; Line 718
// movsx ecx, BYTE PTR [eax+1]
// movsx edx, BYTE PTR [ebp]
// shl ecx, 8
// add ecx, edx
// Below line gives 366KB/clock for 'underdog':
//ulHashTarget = (ulHashTarget - *(pbTarget-cbPattern)*ulBaseToPowerMod)*ulBase + *pbTarget;
pbTarget++;
}
}
// ### Karp-Rabin-Kaze algorithm ]
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
unsigned long ulHashTarget;
unsigned long count;
unsigned long countSTATIC;
// unsigned long countRemainder;
/*
const unsigned char SINGLET = *(char *)(pbPattern);
const unsigned long Quadruplet2nd = SINGLET<<8;
const unsigned long Quadruplet3rd = SINGLET<<16;
const unsigned long Quadruplet4th = SINGLET<<24;
*/
unsigned char SINGLET;
unsigned long Quadruplet2nd;
unsigned long Quadruplet3rd;
unsigned long Quadruplet4th;
unsigned long AdvanceHopperGrass;
long i; //BMH needed
int a, j, bm_bc[ASIZE]; //BMH needed
unsigned char ch; //BMH needed
// unsigned char lastch, firstch; //BMH needed
if (cbPattern > cbTarget)
return(NULL);
// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
pbTarget = pbTarget+cbPattern;
ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
// countSTATIC = cbPattern-2;
if ( cbPattern==3) {
for ( ;; )
{
if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
}
if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else {
}
for ( ;; )
{
// The line below gives for 'cbPattern'>=1:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
// Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
return((long)(pbTarget-cbPattern));
*/
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
// Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock
/*
//For 2 and 3 [
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
// count = countSTATIC;
count = cbPattern-2;
// while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
count--;
}
if ( count == 0) return((pbTarget-cbPattern));
}
if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/
if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
return((pbTarget-2));
if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC>>2;
countRemainder = countSTATIC % 4;
while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
count--;
}
//if (count == 0) { // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
countRemainder--;
}
//if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
//}
}
*/
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
pbTarget = pbTarget+cbPattern;
ulHashPattern = *(unsigned long *)(pbPattern);
// countSTATIC = cbPattern-1;
//SINGLET = *(char *)(pbPattern);
SINGLET = ulHashPattern & 0xFF;
Quadruplet2nd = SINGLET<<8;
Quadruplet3rd = SINGLET<<16;
Quadruplet4th = SINGLET<<24;
for ( ;; )
{
AdvanceHopperGrass = 0;
ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);
if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
// count = countSTATIC;
// while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
// if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
// count--;
// }
count = cbPattern-1;
while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
count--;
}
if ( count == 0) return((pbTarget-cbPattern));
} else { // The goal here: to avoid memory accesses by stressing the registers.
if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
AdvanceHopperGrass++;
if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
AdvanceHopperGrass++;
if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
}
}
}
AdvanceHopperGrass++;
pbTarget = pbTarget + AdvanceHopperGrass;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else { //if (cbTarget<961)
countSTATIC = cbPattern-2;
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;
/* Searching */
//lastch=pbPattern[cbPattern-1];
//firstch=pbPattern[0];
i=0;
while (i <= cbTarget-cbPattern) {
ch=pbTarget[i+cbPattern-1];
//if (ch ==lastch)
//if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
//if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
{
count = countSTATIC;
while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
count--;
}
if ( count == 0) return(pbTarget+i);
}
i+=bm_bc[ch];
}
return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_6pp (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
unsigned long ulHashTarget;
//unsigned long count; //r.6+
signed long count;
//unsigned long countSTATIC; //r.6+
signed long countSTATIC;
// unsigned long countRemainder;
/*
const unsigned char SINGLET = *(char *)(pbPattern);
const unsigned long Quadruplet2nd = SINGLET<<8;
const unsigned long Quadruplet3rd = SINGLET<<16;
const unsigned long Quadruplet4th = SINGLET<<24;
*/
unsigned char SINGLET;
unsigned long Quadruplet2nd;
unsigned long Quadruplet3rd;
unsigned long Quadruplet4th;
unsigned long AdvanceHopperGrass;
long i; //BMH needed
int a, j, bm_bc[ASIZE]; //BMH needed
unsigned char ch; //BMH needed
// unsigned char lastch, firstch; //BMH needed
if (cbPattern > cbTarget)
return(NULL);
// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
pbTarget = pbTarget+cbPattern;
ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
// countSTATIC = cbPattern-2;
if ( cbPattern==3) {
for ( ;; )
{
if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) return((pbTarget-3));
}
if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else {
}
for ( ;; )
{
// The line below gives for 'cbPattern'>=1:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
// Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
return((long)(pbTarget-cbPattern));
*/
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
// Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock
/*
//For 2 and 3 [
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
// count = countSTATIC;
count = cbPattern-2;
// while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
count--;
}
if ( count == 0) return((pbTarget-cbPattern));
}
if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/
if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
return((pbTarget-2));
if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC>>2;
countRemainder = countSTATIC % 4;
while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
count--;
}
//if (count == 0) { // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
countRemainder--;
}
//if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
//}
}
*/
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
pbTarget = pbTarget+cbPattern;
ulHashPattern = *(unsigned long *)(pbPattern);
// countSTATIC = cbPattern-1;
//SINGLET = *(char *)(pbPattern);
SINGLET = ulHashPattern & 0xFF;
Quadruplet2nd = SINGLET<<8;
Quadruplet3rd = SINGLET<<16;
Quadruplet4th = SINGLET<<24;
for ( ;; )
{
AdvanceHopperGrass = 0;
ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);
if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
// count = countSTATIC;
// while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
// if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
// count--;
// }
count = cbPattern-1;
while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
count--;
}
if ( count == 0) return((pbTarget-cbPattern));
} else { // The goal here: to avoid memory accesses by stressing the registers.
if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
AdvanceHopperGrass++;
if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
AdvanceHopperGrass++;
if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
}
}
}
AdvanceHopperGrass++;
pbTarget = pbTarget + AdvanceHopperGrass;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else { //if (cbTarget<961)
//countSTATIC = cbPattern-2; //r.6+
//countSTATIC = cbPattern-2-3;
countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
ulHashPattern = *(unsigned long *)(pbPattern);
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;
/* Searching */
//lastch=pbPattern[cbPattern-1];
//firstch=pbPattern[0];
i=0;
while (i <= cbTarget-cbPattern) {
ch=pbTarget[i+cbPattern-1];
//if (ch ==lastch)
//if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
//if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
//if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0]) // r.6+
//if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == *(long *)&pbPattern[0]) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
{
count = countSTATIC;
//while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) { // r.6+
// A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
count--;
}
if ( count <= 0) return(pbTarget+i);
}
i+=bm_bc[ch];
}
return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun_Quadruplet_6pp_count_hits (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
unsigned long ulHashTarget;
//unsigned long count; //r.6+
signed long count;
//unsigned long countSTATIC; //r.6+
signed long countSTATIC;
// unsigned long countRemainder;
/*
const unsigned char SINGLET = *(char *)(pbPattern);
const unsigned long Quadruplet2nd = SINGLET<<8;
const unsigned long Quadruplet3rd = SINGLET<<16;
const unsigned long Quadruplet4th = SINGLET<<24;
*/
unsigned char SINGLET;
unsigned long Quadruplet2nd;
unsigned long Quadruplet3rd;
unsigned long Quadruplet4th;
unsigned long AdvanceHopperGrass;
long i; //BMH needed
int a, j, bm_bc[ASIZE]; //BMH needed
unsigned char ch; //BMH needed
// unsigned char lastch, firstch; //BMH needed
if (cbPattern > cbTarget)
return(NULL);
// Doesn't work when cbPattern = 1
// The next IF-fragment works very well with cbPattern>1, OBVIOUSLY IT MUST BE UNROLLED(but crippled with less functionality) SINCE either cbPattern=2 or cbPattern=3!
if ( cbPattern<4) { // This IF makes me unhappy: it slows down from 390KB/clock to 367KB/clock for 'fast' pattern. This fragment(for 2..3 pattern lengths) is needed because I need a function different than strchr but sticking to strstr i.e. lengths above 1 are to be handled.
pbTarget = pbTarget+cbPattern;
ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
// countSTATIC = cbPattern-2;
if ( cbPattern==3) {
for ( ;; )
{
if ( ulHashPattern == ( (*(char *)(pbTarget-3))<<8 ) + *(pbTarget-1) ) {
if ( *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) Railgunhits++; //return((pbTarget-3));
}
if ( (char)(ulHashPattern>>8) != *(pbTarget-2) ) pbTarget++;
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else {
}
for ( ;; )
{
// The line below gives for 'cbPattern'>=1:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
// Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
return((long)(pbTarget-cbPattern));
*/
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
// Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock
/*
//For 2 and 3 [
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
// count = countSTATIC;
count = cbPattern-2;
// while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
while ( count && *(char *)(pbPattern+1) == *(char *)(pbTarget-2) ) { // Crippling i.e. only 2 and 3 chars are allowed!
count--;
}
if ( count == 0) return((pbTarget-cbPattern));
}
if ( (char)(ulHashPattern>>8) != *(pbTarget-cbPattern+1) ) pbTarget++;
//For 2 and 3 ]
*/
if ( ulHashPattern == ( (*(char *)(pbTarget-2))<<8 ) + *(pbTarget-1) )
Railgunhits++; //return((pbTarget-2));
if ( (char)(ulHashPattern>>8) != *(pbTarget-1) ) pbTarget++;
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC>>2;
countRemainder = countSTATIC % 4;
while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
count--;
}
//if (count == 0) { // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
countRemainder--;
}
//if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
//}
}
*/
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else { //if ( cbPattern<4)
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
pbTarget = pbTarget+cbPattern;
ulHashPattern = *(unsigned long *)(pbPattern);
// countSTATIC = cbPattern-1;
//SINGLET = *(char *)(pbPattern);
SINGLET = ulHashPattern & 0xFF;
Quadruplet2nd = SINGLET<<8;
Quadruplet3rd = SINGLET<<16;
Quadruplet4th = SINGLET<<24;
for ( ;; )
{
AdvanceHopperGrass = 0;
ulHashTarget = *(unsigned long *)(pbTarget-cbPattern);
if ( ulHashPattern == ulHashTarget ) { // Three unnecessary comparisons here, but 'AdvanceHopperGrass' must be calculated - it has a higher priority.
// count = countSTATIC;
// while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
// if ( countSTATIC==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) AdvanceHopperGrass++;
// count--;
// }
count = cbPattern-1;
while ( count && *(char *)(pbPattern+(cbPattern-count)) == *(char *)(pbTarget-count) ) {
if ( cbPattern-1==AdvanceHopperGrass+count && SINGLET != *(char *)(pbTarget-count) ) AdvanceHopperGrass++;
count--;
}
if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
} else { // The goal here: to avoid memory accesses by stressing the registers.
if ( Quadruplet2nd != (ulHashTarget & 0x0000FF00) ) {
AdvanceHopperGrass++;
if ( Quadruplet3rd != (ulHashTarget & 0x00FF0000) ) {
AdvanceHopperGrass++;
if ( Quadruplet4th != (ulHashTarget & 0xFF000000) ) AdvanceHopperGrass++;
}
}
}
AdvanceHopperGrass++;
pbTarget = pbTarget + AdvanceHopperGrass;
if (pbTarget > pbTargetMax)
return(NULL);
}
} else { //if (cbTarget<961)
//countSTATIC = cbPattern-2; //r.6+
//countSTATIC = cbPattern-2-3;
countSTATIC = cbPattern-2-2; // r.6+++ I suppose that the awful degradation comes from 2bytes more (from either 'if (countSTATIC<0) countSTATIC=0;' or 'count >0' fixes) which make the function unfittable in code cache lines?!
ulHashPattern = *(unsigned long *)(pbPattern);
// Next line fixes the BUG from r.6++: but with awful speed degradation! So the bug is fixed in the definitions by setting 'countSTATIC = cbPattern-2-2;', bug appears only for patterns with lengths of 4, The setback is one unnecessary comparison for 5bytes patterns, stupidly such setback exists (from before) for 4bytes as well.
//if (countSTATIC<0) countSTATIC=0;
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;
/* Searching */
//lastch=pbPattern[cbPattern-1];
//firstch=pbPattern[0];
i=0;
while (i <= cbTarget-cbPattern) {
ch=pbTarget[i+cbPattern-1];
//if (ch ==lastch)
//if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
//if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
//if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0]) // r.6+
//if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == *(long *)&pbPattern[0]) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
if (ch == pbPattern[cbPattern-1] && *(long *)&pbTarget[i] == ulHashPattern) // No problema here since we have 4[+] long pattern here. Overlapping (1 byte recompared) when length=4, grmbl.
{
count = countSTATIC;
//while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) { // r.6+
// A BUG (in next line) crushed from r.6++: 'count !=0' becomes 'count >0' in r.6+++ but with awful speed degradation! So the bug is fixed outside the cycles by setting 'countSTATIC' from -1 to 0, bug appears only for patterns with lengths of 4.
while ( count !=0 && *(char *)(pbPattern+1+3+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+3+(countSTATIC-count)) ) { // if pattern length is 4 or 5 we have count=-1 and count=0 respectively i.e. no need of comparing in-between chars.
count--;
}
if ( count <= 0) Railgunhits++; //return(pbTarget+i);
}
i+=bm_bc[ch];
}
return(NULL);
} //if (cbTarget<961)
} //if ( cbPattern<4)
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm [
// Caution: For better speed the case 'if (cbPattern==1)' was removed, so Pattern must be longer than 1 char.
char * Railgun (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
unsigned long ulHashTarget;
unsigned long count;
unsigned long countSTATIC, countRemainder;
long i; //BMH needed
int a, j, bm_bc[ASIZE]; //BMH needed
unsigned char ch; //BMH needed
// unsigned char lastch, firstch; //BMH needed
if (cbPattern > cbTarget)
return(NULL);
countSTATIC = cbPattern-2;
// Doesn't work when cbPattern = 1
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
pbTarget = pbTarget+cbPattern;
ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
for ( ;; )
{
// The line below gives for 'cbPattern'>=1:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
// Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
return((long)(pbTarget-cbPattern));
*/
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
// Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC;
while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
count--;
}
if ( count == 0) return((pbTarget-cbPattern));
}
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC>>2;
countRemainder = countSTATIC % 4;
while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
count--;
}
//if (count == 0) { // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
countRemainder--;
}
//if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
//}
}
*/
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
}
else
{
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;
/* Searching */
//lastch=pbPattern[cbPattern-1];
//firstch=pbPattern[0];
i=0;
while (i <= cbTarget-cbPattern) {
ch=pbTarget[i+cbPattern-1];
//if (ch ==lastch)
//if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
//if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
{
count = countSTATIC;
while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
count--;
}
if ( count == 0) return(pbTarget+i);
}
i+=bm_bc[ch];
}
return(NULL);
}
}
// ### Mix(2in1) of Karp-Rabin & Boyer-Moore-Horspool algorithm ]
// ### Railgun_totalhits [
char * Railgun_totalhits (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
unsigned long ulHashTarget;
unsigned long count;
unsigned long countSTATIC, countRemainder;
long i; //BMH needed
int a, j, bm_bc[ASIZE]; //BMH needed
unsigned char ch; //BMH needed
// unsigned char lastch, firstch; //BMH needed
if (cbPattern > cbTarget)
return(NULL);
countSTATIC = cbPattern-2;
// Doesn't work when cbPattern = 1
if (cbTarget<961) // This value is arbitrary(don't know how exactly), it ensures(at least must) better performance than 'Boyer_Moore_Horspool'.
{
pbTarget = pbTarget+cbPattern;
ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
for ( ;; )
{
// The line below gives for 'cbPattern'>=1:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/543
// Karp_Rabin_Kaze_4_OCTETS performance: 372KB/clock
/*
if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
return((long)(pbTarget-cbPattern));
*/
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/546
// Karp_Rabin_Kaze_4_OCTETS performance: 370KB/clock
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC;
while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-count)) ) {
count--;
}
if ( count == 0) Railgunhits++; //return((pbTarget-cbPattern));
}
// The fragment below gives for 'cbPattern'>=2:
// Karp_Rabin_Kaze_4_OCTETS_hits/Karp_Rabin_Kaze_4_OCTETS_clocks: 4/554
// Karp_Rabin_Kaze_4_OCTETS performance: 364KB/clock
/*
if ( ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1) ) {
count = countSTATIC>>2;
countRemainder = countSTATIC % 4;
while ( count && *(unsigned long *)(pbPattern+1+((count-1)<<2)) == *(unsigned long *)(pbTarget-cbPattern+1+((count-1)<<2)) ) {
count--;
}
//if (count == 0) { // Disastrous degradation only from this line(317KB/clock when 1+2x4+2+1 bytes pattern: 'skillessness'; 312KB/clock when 1+1x4+2+1 bytes pattern: 'underdog'), otherwise 368KB/clock.
while ( countRemainder && *(char *)(pbPattern+1+(countSTATIC-countRemainder)) == *(char *)(pbTarget-cbPattern+1+(countSTATIC-countRemainder)) ) {
countRemainder--;
}
//if ( countRemainder == 0) return((long)(pbTarget-cbPattern));
if ( count+countRemainder == 0) return((long)(pbTarget-cbPattern));
//}
}
*/
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
}
else
{
/* Preprocessing */
for (a=0; a < ASIZE; a++) bm_bc[a]=cbPattern;
for (j=0; j < cbPattern-1; j++) bm_bc[pbPattern[j]]=cbPattern-j-1;
/* Searching */
//lastch=pbPattern[cbPattern-1];
//firstch=pbPattern[0];
i=0;
while (i <= cbTarget-cbPattern) {
ch=pbTarget[i+cbPattern-1];
//if (ch ==lastch)
//if (memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) OUTPUT(i);
//if (ch == lastch && pbTarget[i] == firstch && memcmp(&pbTarget[i],pbPattern,cbPattern-1) == 0) return(i); // Kaze: The idea(to prevent execution of slower 'memcmp') is borrowed from Karp-Rabin i.e. to perform a slower check only when the target "looks like".
if (ch == pbPattern[cbPattern-1] && pbTarget[i] == pbPattern[0])
{
count = countSTATIC;
while ( count && *(char *)(pbPattern+1+(countSTATIC-count)) == *(char *)(&pbTarget[i]+1+(countSTATIC-count)) ) {
count--;
}
if ( count == 0) Railgunhits++; //return(pbTarget+i);
}
i+=bm_bc[ch];
}
return(NULL);
}
}
// ### Railgun_totalhits ]
// ### Karp-Rabin-Kaze_BOOSTED algorithm [
char * KarpRabinKaze_BOOSTED (char * pbTarget,
char * pbPattern,
unsigned long cbTarget,
unsigned long cbPattern)
{
char * pbTargetMax = pbTarget + cbTarget;
register unsigned long ulHashPattern;
unsigned long ulHashTarget;
if (cbPattern > cbTarget)
return(NULL);
pbTarget = pbTarget+cbPattern;
ulHashPattern = ( (*(char *)(pbPattern))<<8 ) + *(pbPattern+(cbPattern-1));
for ( ;; )
{
// Kaze: The idea(FAILED) here is to add an additional(second) layer in order to prevent execution of slower hash calculation(i.e. first layer) which(hash) prevents execution of even slower 'memcmp'.
// The line below gives: 314KB/clock
//if ( *pbPattern == *(char *)(pbTarget-cbPattern) && (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
// The line below gives: 370KB/clock
if ( (ulHashPattern == ( (*(char *)(pbTarget-cbPattern))<<8 ) + *(pbTarget-1)) && !memcmp(pbPattern, pbTarget-cbPattern, (unsigned int)cbPattern) )
return((pbTarget-cbPattern));
pbTarget++;
if (pbTarget > pbTargetMax)
return(NULL);
}
}
// ### Karp-Rabin-Kaze_BOOSTED algorithm ]
char * strstr_Microsoft (
const char * str1,
const char * str2
)
{
char *cp = (char *) str1;
char *s1, *s2;
if ( !*str2 )
return((char *)str1);
while (*cp)
{
s1 = cp;
s2 = (char *) str2;
while ( *s1 && *s2 && !(*s1-*s2) )
s1++, s2++;
if (!*s2)
return(cp);
cp++;
}
return(NULL);
}
char *
strstr_GNU_C_Library (phaystack, pneedle)
const char *phaystack;
const char *pneedle;
{
const unsigned char *haystack, *needle;
char b;
const unsigned char *rneedle;
haystack = (const unsigned char *) phaystack;
if ((b = *(needle = (const unsigned char *) pneedle)))
{
char c;
haystack--; /* possible ANSI violation */
{
char a;
do
if (!(a = *++haystack))
goto ret0;
while (a != b);
}
if (!(c = *++needle))
goto foundneedle;
++needle;
goto jin;
for (;;)
{
{
char a;
if (0)
jin:{
if ((a = *++haystack) == c)
goto crest;
}
else
a = *++haystack;
do
{
for (; a != b; a = *++haystack)
{
if (!a)
goto ret0;
if ((a = *++haystack) == b)
break;
if (!a)
goto ret0;
}
}
while ((a = *++haystack) != c);
}
crest:
{
char a;
{
const unsigned char *rhaystack;
if (*(rhaystack = haystack-- + 1) == (a = *(rneedle = needle)))
do
{
if (!a)
goto foundneedle;
if (*++rhaystack != (a = *++needle))
break;
if (!a)
goto foundneedle;
}
while (*++rhaystack == (a = *++needle));
needle = rneedle; /* took the register-poor aproach */
}
if (!a)
break;
}
}
}
foundneedle:
return (char *) haystack;
ret0:
return 0;
}
int main( int argc, char *argv[] )
{
FILE *fp_inLINE;
int Bozan;
long ThunderwithL, ThunderwithR;
char *Strng;
long Strnglen;
long StrnglenTRAVERSED;
char Pattern[20+2000]; // skillessness=12 human consciousness=19 I should have known=19
// In the East, enlightenment is described as a state of ultimate=62
int Patternlen;
long LinesEncountered=0;
long BruteForceDummyhits=0;
long KarpRabinKazehits=0;
long KarpRabinKaze_BOOSTEDhits=0;
long Karp_Rabin_Kaze_4_OCTETShits=0;
long Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
long KarpRabinhits=0;
long HORSPOOLhits=0;
long HORSPOOL_Kazehits=0;
long strstrMicrosofthits=0;
long strstrGNUCLibraryhits=0;
long Railgun_Quadruplet_6pp_GO=0;
long dummy;
int i, j;
char *DumboBox[8][2] = { "an\0", "to\0",
"TDK\0", "the\0",
"fast\0", "easy\0",
"grmbl\0", "email\0",
"pasting\0", "amazing\0",
"underdog\0", "superdog\0",
"participants\0", "skillessness\0",
"I should have known\0", "human consciousness\0"
};
long FoundIn;
char *FoundInPTR;
printf("strstr_SHORT-SHOWDOWN, revision 6+++, written by Kaze.\n");
Pattern[0]=0x00;
Strng = (char *)malloc( 200*1024*1024 );
if( Strng == NULL )
{ puts( "strstr_SHORT-SHOWDOWN: Needed memory allocation denied!\n" ); return( 1 ); }
if( ( fp_inLINE = fopen( "OSHO.TXT", "rb" ) ) == NULL )
{ printf( "strstr_SHORT-SHOWDOWN: Can't open 'OSHO.TXT' file.\n" ); return( 1 ); }
fseek(fp_inLINE, 0, SEEK_END);
Strnglen = ftell(fp_inLINE);
fseek(fp_inLINE, 0, SEEK_SET);
fread(Strng, 1, Strnglen, fp_inLINE);
if (argc==1) {
printf("Input Pattern(up to 19+2000 chars): "); gets(Pattern); // char * __cdecl gets(char *);
Patternlen = strlen(&Pattern[0]);
}
// Replacing CR with NULL i.e. 13->0
for (ThunderwithL=0; ThunderwithL<Strnglen; ThunderwithL++)
if (Strng[ThunderwithL] == 13) Strng[ThunderwithL] = 0;
ThunderwithL=0;ThunderwithR=0;
if (argc==1) {
// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
dummy = HORSPOOL_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "BM_HORSPOOL_hits/BM_HORSPOOL_clocks: %lu/%lu\n", Railgunhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "BM_HORSPOOL performance: %luKB/clock\n", (Strnglen/((long)(TotalRoughSearchTime)>>4))>>10);
// As one line: ]
// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
FoundInPTR = Railgun_totalhits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Railgun_hits/Railgun_clocks: %lu/%lu\n", Railgunhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun performance: %luKB/clock\n", (Strnglen/((long)(TotalRoughSearchTime)>>4))>>10);
// As one line: ]
// As one line: [
Railgunhits=0;
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) as-one-line ...\n", Patternlen, Strnglen);
printf( "WARNING! The next line works with BMH only for pattern 4[+] long, otherwise (for 2 and 3) other searcher takes over!\n");
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
FoundInPTR = Railgun_Quadruplet_6pp_count_hits(&Strng[0], &Pattern[0], Strnglen, Patternlen);
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun_6pp performance: %luKB/clock\n", (Strnglen/((long)(TotalRoughSearchTime)>>4))>>10);
// As one line: ]
} //if (argc==1) {
// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
for (i = 0; i<8; i++)
{
Railgunhits=0;
clocks3 = clock();
dummy = HORSPOOL_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
printf( "Found ('%s') %d time(s), BM_HORSPOOL performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
}
Railgunhits=0;
clocks3 = clock();
dummy = HORSPOOL_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
printf( "Found ('%s') %d time(s), BM_HORSPOOL performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
}
}
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "BM_HORSPOOL_hits/BM_HORSPOOL_clocks: %lu/%lu\n", Railgunhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "BM_HORSPOOL 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/((long)(TotalRoughSearchTime)>>4))>>10);
// As one line: ]
// As one line: [
printf( "\nDoing Search for 8x2 Patterns into String(%dbytes) as-one-line ...\n", Strnglen);
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
for (i = 0; i<8; i++)
{
Railgunhits=0;
clocks3 = clock();
FoundInPTR = Railgun_Quadruplet_6pp_count_hits(&Strng[0], DumboBox[i][0], Strnglen, strlen(DumboBox[i][0]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
printf( "Found ('%s') %d time(s), Railgun_6pp performance: %luKB/clock\n", DumboBox[i][0], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
}
Railgunhits=0;
clocks3 = clock();
FoundInPTR = Railgun_Quadruplet_6pp_count_hits(&Strng[0], DumboBox[i][1], Strnglen, strlen(DumboBox[i][1]));
clocks4 = clock();
if (Bozan == (1<<4)-1) { TotalRoughSearchTime = clocks4 - clocks3; TotalRoughSearchTime++;
printf( "Found ('%s') %d time(s), Railgun_6pp performance: %luKB/clock\n", DumboBox[i][1], Railgunhits, (Strnglen/((long)(TotalRoughSearchTime)))>>10);
}
}
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
//printf( "Railgun_6pp_hits/Railgun_6pp_clocks: %lu/%lu\n", Railgunhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun_6pp 8x2 i.e. average performance: %luKB/clock\n", ((unsigned long long)8*2*Strnglen/((long)(TotalRoughSearchTime)>>4))>>10);
// As one line: ]
if (argc==1) {
printf( "\nDoing Search for Pattern(%dbytes) into String(%dbytes) line-by-line ...\n", Patternlen, Strnglen);
// 7[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = strstr_Microsoft(&Strng[ThunderwithL], &Pattern[0]);
if ( FoundInPTR != NULL) {strstrMicrosofthits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_Microsoft_hits/strstr_Microsoft_clocks: %lu/%lu\n", strstrMicrosofthits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "strstr_Microsoft performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 7]
// 8[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = strstr_GNU_C_Library(&Strng[ThunderwithL], &Pattern[0]);
if ( FoundInPTR != 0) {strstrGNUCLibraryhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: %lu/%lu\n", strstrGNUCLibraryhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "strstr_GNU_C_Library performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 8]
// 9[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = Railgun(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_hits/Railgun_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 9]
// +[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = Railgun_Quadruplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun_Quadruplet performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// +]
// Z[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = Railgun_Quadruplet_6pp(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {Railgun_Quadruplet_6pp_GO++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_6pp_hits/Railgun_Quadruplet_6pp_clocks: %lu/%lu\n", Railgun_Quadruplet_6pp_GO>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun_Quadruplet_6pp performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// Z]
// 6[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = KarpRabinKaze_BOOSTED(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {KarpRabinKaze_BOOSTEDhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "KarpRabinKaze_BOOSTED_hits/KarpRabinKaze_BOOSTED_clocks: %lu/%lu\n", KarpRabinKaze_BOOSTEDhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "KarpRabinKaze_BOOSTED performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 6]
// 2[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = KarpRabinKaze(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {KarpRabinKazehits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "KarpRabinKaze_hits/KarpRabinKaze_clocks: %lu/%lu\n", KarpRabinKazehits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "KarpRabinKaze performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 2]
// 3[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundIn = Karp_Rabin(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundIn != -1) {KarpRabinhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Karp_Rabin_hits/Karp_Rabin_clocks: %lu/%lu\n", KarpRabinhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Karp_Rabin performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 3]
// 1[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundIn = Brute_Force_Dummy(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundIn != -1) {BruteForceDummyhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Brute_Force_Dummy_hits/Brute_Force_Dummy_clocks: %lu/%lu\n", BruteForceDummyhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Brute_Force_Dummy performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 1]
// 4[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundIn = HORSPOOL(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundIn != -1) {HORSPOOLhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Boyer-Moore-Horspool_hits/Boyer-Moore-Horspool_clocks: %lu/%lu\n", HORSPOOLhits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Boyer-Moore-Horspool performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 4]
// 5[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundIn = Boyer_Moore_Horspool_Kaze(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundIn != -1) {HORSPOOL_Kazehits++; StrnglenTRAVERSED=StrnglenTRAVERSED+FoundIn;} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Boyer_Moore_Horspool_Kaze_hits/Boyer_Moore_Horspool_Kaze_clocks: %lu/%lu\n", HORSPOOL_Kazehits>>4, (long)(TotalRoughSearchTime)>>4);
printf( "Boyer_Moore_Horspool_Kaze performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 5]
// DUMBO ... [
printf( "\nDUMBO 8x2 ...\n");
for (i = 0; i<8; i++)
{
for (j = 0; j<2; j++)
{
strcpy (Pattern, DumboBox[i][j]);
Patternlen = strlen(&Pattern[0]);
printf( "\nSearching for Pattern('%s',%dbytes) into String(%dbytes) line-by-line ...\n\n", Pattern, Patternlen, Strnglen);
// 7[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
strstrMicrosofthits=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = strstr_Microsoft(&Strng[ThunderwithL], &Pattern[0]);
if ( FoundInPTR != NULL) {strstrMicrosofthits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_Microsoft_hits/strstr_Microsoft_clocks: %lu/%lu\n", strstrMicrosofthits, (long)(TotalRoughSearchTime)>>4);
printf( "strstr_Microsoft performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 7]
// 8[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
strstrGNUCLibraryhits=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = strstr_GNU_C_Library(&Strng[ThunderwithL], &Pattern[0]);
if ( FoundInPTR != 0) {strstrGNUCLibraryhits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "strstr_GNU_C_Library_hits/strstr_GNU_C_Library_clocks: %lu/%lu\n", strstrGNUCLibraryhits, (long)(TotalRoughSearchTime)>>4);
printf( "strstr_GNU_C_Library performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 8]
// 9[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = Railgun(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_hits/Railgun_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// 9]
// +[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = Railgun_Quadruplet(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_hits/Railgun_Quadruplet_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun_Quadruplet performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// +]
// Z[
clocks1 = clock();
for (Bozan=0; Bozan < (1<<4); Bozan++) // 16 times, at end >>4
{
Karp_Rabin_Kaze_4_OCTETShits_DOUBLET=0;
//Search area is between Strng[0] .. Strng[n-1]
StrnglenTRAVERSED=0; // Only traversed chars i.e. real
ThunderwithL=0;ThunderwithR=0;
for (;;)
{
while (Strng[ThunderwithR] != 10 && ThunderwithR < Strnglen-1) {ThunderwithR++;}
FoundInPTR = Railgun_Quadruplet_6pp(&Strng[ThunderwithL], &Pattern[0], ThunderwithR - ThunderwithL, Patternlen);
if ( FoundInPTR != NULL) {Karp_Rabin_Kaze_4_OCTETShits_DOUBLET++; StrnglenTRAVERSED=StrnglenTRAVERSED+(FoundInPTR-&Strng[ThunderwithL]);} else StrnglenTRAVERSED=StrnglenTRAVERSED+(ThunderwithR - ThunderwithL);
LinesEncountered++;
ThunderwithR++; ThunderwithL=ThunderwithR;
if (ThunderwithR >= Strnglen-1) break;
}
if (Bozan != (1<<4)-1) LinesEncountered=0;
}
clocks2 = clock(); TotalRoughSearchTime = clocks2 - clocks1; TotalRoughSearchTime++;
printf( "LinesEncountered: %lu\n", LinesEncountered);
printf( "Railgun_Quadruplet_6pp_hits/Railgun_Quadruplet_6pp_clocks: %lu/%lu\n", Karp_Rabin_Kaze_4_OCTETShits_DOUBLET, (long)(TotalRoughSearchTime)>>4);
printf( "Railgun_Quadruplet_6pp performance: %luKB/clock\n", (StrnglenTRAVERSED/((long)(TotalRoughSearchTime)>>4))>>10);
printf( "StrnglenTRAVERSED: %lu bytes\n", StrnglenTRAVERSED);
// Z]
}
}
// DUMBO ... ]
} //if (argc==1) {
//exit(1); // Comment it to get on!
return(0);
}