|
|
Comments and Discussions
|
|
 |

|
You might find this interesting for the assembly optimizations
Asm optimized crc32
//Ante, ante.c@runbox.com
|
|
|
|

|
Hey. Nice stuff, found it useful.
I decided to try my hand at the assembler, poked around the link listed by Ante above. My result is below. In my case, I "init" the CRC in a separate call, this routine works a series of buffers as they're received until I reach the end. My table is a global variable, not a static member of the object, but otherwise it's like yours. I noted that on the site Ante links to, they claimed 158 Mbytes per second on a T'bird 1.4 Ghz. On my 1.5Ghz AMD XP I get 275 Mbytes per second, not sure if the T'bird should be that far behind or not. For me, the punch came from loading a quad word at a time, not a byte at a time.
__asm
{
mov eax, this // Load 'this'
mov ecx, [eax]this.CurrentCRC // Load running CRC from 'this'
mov edi, offset Crc32Table // Load the CRC32 table
mov esi, buf // Load buffer
xor ebx, ebx // zero ebx, used to process bytes, forming
// index into crc table
mov eax, len // get length
mov edx, eax
and eax, 3 // calc remainder after division by 4
push eax // preserve the remainder for later
shr edx, 2 // div by 4, calculating total quadwords
jz crc32tail // if zero, prepare for a tiny bit of work
crc32loop:
mov eax, [esi] // grab a quadword from buf
mov bl, al // form index entry, starting with a byte from buf
// part one of 4 in the quadword
xor bl, cl // xor against current CRC
shr ecx, 8 // shift CRC
xor ecx, [edi + ebx * 4] // xor CRC with the table's entry
// part two of 4 in the quadword
mov bl, ah // grab another byte of buf
xor bl, cl // xor against current CRC
shr ecx, 8 // shift CRC
xor ecx, [edi + ebx * 4] // xor CRC with table's entry
shr eax, 16 // shift the buf data two bytes down
// part three of 4 in the quadword
mov bl, al // grab another byte of buf
xor bl, cl // xor against current CRC
shr ecx, 8 // shift CRC
xor ecx, [edi + ebx * 4] // xor CRC with table's entry
// part four of 4 in the quadword
mov bl, ah // grab another byte of buf
xor bl, cl // xor against current CRC
shr ecx, 8 // shift CRC
xor ecx, [edi + ebx * 4] // xor CRC with table's entry
add esi, 4 // Advance the source pointer one quadword
dec edx // counting quadwords
jnz crc32loop // if more quadwords, loop
crc32tail:
pop edx // retreive the remainder of quadwords
cmp edx, 0 // check to see if it's zero
je crc32end
crc32tinyloop:
mov bl, byte ptr [esi] // grab one byte from buf
xor bl, cl // xor against current crc
shr ecx, 8 // shift crc
xor ecx, [edi + ebx * 4] // xor crc with table's entry
inc esi // increment buf pointer
dec edx // dec count
jnz crc32tinyloop // loop if not zero
crc32end:
mov eax, this
mov [eax]this.CurrentCRC, ecx // write to CurrentCRC
|
|
|
|

|
Hahaha
I've been reading way to much on the Opteron!
Sorry guys.
My post kept refering to quadwords in that should have read dwords.
Same theory, just half the size.
Now, on the Opteron - once I get one - this should really zing right through it.
|
|
|
|

|
#pragma once
#include <iostream>
#include <tchar.h>
#include <windows.h>
----------------------------------------------
<pre>
#include "stdafx.h"
#define POLY 0xEDB88320
#define SEED 0xFFFFFFFF
DWORD CRCTable[256];
int _tmain(int argc, _TCHAR* argv[])
{ DWORD crc32;
__asm ; CRCTable filling.
{ xor ebx, ebx ; bl - CRCTable entry index. Init with 0.
mov ecx, POLY ; Load polynom into ecx for speed
CRCTableLoop: ; Head of CRC table calculation main loop.
mov eax, ebx ; Load index into eax.
xor edx, edx ; edx = 0
shr eax, 1 ; Carry Falg = LSB of eax.
cmovc edx, ecx ; edx = (eax & 1) ? POLY : 0
xor eax, edx ; eax = (eax & 1) ? eax ^ POLY : eax
; The same with other bits
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
xor edx, edx
shr eax, 1
cmovc edx, ecx
xor eax, edx
mov CRCTable[4*ebx], eax ; Fill the current table entry.
inc bl ; Move to the next table entry.
jnz CRCTableLoop ; If index < 256 Then continue CRCTable values
; Else Table is full.
}
printf("CRC32\t\tFileSize\tFileName\n");
for(int i = 1; i < argc; ++i)
{ HANDLE hFile = CreateFile(argv[i], GENERIC_READ, FILE_SHARE_READ, NULL, OPEN_EXISTING, 0, NULL);
if(hFile == INVALID_HANDLE_VALUE)
{ printf("Error opening\t%s\n", argv[i]);
continue;
}
HANDLE hMap = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL);
if(hMap == NULL)
{ CloseHandle(hFile);
printf("Error creating map\t%s\n", argv[i]);
continue;
}
LPVOID pBuffer = MapViewOfFile(hMap, FILE_MAP_READ, 0, 0, 0);
if(pBuffer == NULL)
{ CloseHandle(hMap);
CloseHandle(hFile);
printf("Error MapViewOfFile\t%s\n", argv[i]);
continue;
}
DWORD dwSize = GetFileSize(hFile, NULL);
__asm
{ mov esi, pBuffer ; esi = buffer pointer.
mov ecx, dwSize ; ecs = buffer size.
mov eax, SEED ; Init CRC.
CalcCRC: ; Head of buffer CRC calculation main loop.
movzx ebx, byte ptr [esi] ; bl = next char, other ebx bits = 0
xor bl, al ; /Calculate
shr eax, 8 ; | current
mov ebx, CRCTable[4*ebx] ; \CRC value.
xor eax, ebx ; eax = current CRC value.
inc esi ; Move to the next char.
dec ecx ; Decrement of remaining bytes counter.
jnz CalcCRC ; IF counter > 0 THEN continue buffer CRC calculation.
not eax ; eax ^= 0xFFFFFFFF
mov crc32, eax ; save CRC
}
DWORD t3 = GetTickCount();
printf("0x%-08X\t%-8d\t%s\n", crc32, dwSize, argv[i]);
UnmapViewOfFile(pBuffer);
CloseHandle(hMap);
CloseHandle(hFile);
}
return 0;
}
|
|
|
|

|
Here's a loop I came up with:
#pragma warning(push)
#pragma warning(disable : 4035)
unsigned long Crc32_Asm(char *pBuf, unsigned nBytes,
unsigned *pCrcTbl, unsigned nCrc)
{
__asm {
mov edi,pBuf
mov ecx,nBytes
mov eax,nCrc
mov ebx,pCrcTbl
add edi,ecx
neg ecx
mov edx,eax
again:
and edx,0x000000ff
shr eax,8
movzx esi,byte ptr [edi+ecx]
xor edx,esi
mov esi,[ebx+edx*4]
xor eax,esi
inc ecx
mov edx,eax
jnz again
}
}
#pragma warning(pop)
|
|
|
|
 |
|
|
General News Suggestion Question Bug Answer Joke Rant Admin
|
How to generate a CRC32 based on a file
| Type | Article |
| Licence | |
| First Posted | 17 Dec 2001 |
| Views | 456,653 |
| Bookmarked | 153 times |
|
|