format PE console
include '%fasminc%\win32axp.inc'
ITER equ 64 ; The number of iterations
Julia equ 0
Mandel equ 1
macro copyscr reg, from
{
MOVSS reg, dword[from]
SHUFPS reg, reg, 0
}
macro startm
{
xor eax, eax
cpuid
rdtsc
mov [b], eax
}
macro endm
{
xor eax, eax
cpuid
rdtsc
sub eax, [b]
}
; MUL
macro JuliaMandelPaintMUL color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0
MULPS xmm0, xmm0
MULPS xmm2, xmm1
MULPS xmm1, xmm1
MOVAPS xmm3, xmm1
ADDPS xmm1, xmm0
CMPLEPS xmm1, xmm7
SUBPS xmm0, xmm3
ADDPS xmm2, xmm2
MOVMSKPS eax, xmm1
test eax, eax
jz EXIT
if color
ANDPS xmm1, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm1
end if
MOVAPS xmm1, xmm2
if type = Julia
ADDPS xmm1, dqword[cy1]
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
; FFFF
macro JuliaMandelPaintFFFF color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0
MULPS xmm2, xmm1
MULPS xmm0, xmm0
MULPS xmm1, xmm1
addps xmm2, xmm2
movaps xmm3, xmm0
addps xmm3, xmm1
cmpltps xmm3, xmm7
movmskps eax, xmm3
test eax, eax
jz EXIT
subps xmm0, xmm1
movaps xmm1, xmm2
if color
ANDPS xmm3, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm3
end if
if type = Julia
ADDPS xmm0, dqword[cx1]
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm0, xmm4
ADDPS xmm1, xmm5
end if
dec ecx
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
; MOVADD
macro JuliaMandelPaintMOVADD color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0
MULPS xmm0, xmm0
MOVAPS xmm3, xmm1
ADDPS xmm1, xmm1
MULPS xmm3, xmm3
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
MULPS xmm1, xmm2
MOVAPS xmm2, xmm0
ADDPS xmm2, xmm3
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7
SUBPS xmm0, xmm3
MOVMSKPS eax, xmm2
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2
end if
if type = Julia
ADDPS xmm1, dqword[cy1]
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD2
macro JuliaMandelPaintMOVADD2 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
ADDPS xmm2, xmm3 ; 14 - 18
SUBPS xmm0, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7 ; 18 - 22
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 20 - 24
end if
MOVMSKPS eax, xmm2 ; 22 - 28
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 23 - 25 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 26 - 30
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD2ps2dq
macro JuliaMandelPaintMOVADD2ps2dq color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
ADDPS xmm2, xmm3 ; 14 - 18
SUBPS xmm0, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7 ; 18 - 22
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 20 - 24
end if
MOVMSKPS eax, xmm2 ; 22 - 28
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 23 - 25 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 26 - 30
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTPS2DQ xmm6, xmm6
MOVAPS dqword[scratch], xmm6
mov eax, [scratch]
mov ecx, [scratch+4]
mov eax, [esi + eax]
mov ecx, [esi + ecx]
mov [edi], eax
mov [edi+4], ecx
mov eax, [scratch+8]
mov ecx, [scratch+12]
mov eax, [esi + eax]
mov ecx, [esi + ecx]
mov [edi+8], eax
mov [edi+12], ecx
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD3
macro JuliaMandelPaintMOVADD3 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
SUBPS xmm0, xmm3 ; 14 - 18
ADDPS xmm2, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
CMPLEPS xmm2, xmm7 ; 20 - 24
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 18 - 22
end if
MOVMSKPS eax, xmm2 ; 24 - 30
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 28 - 32
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;MOVADD4
macro JuliaMandelPaintMOVADD4 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm0 ; 0 - 6
MULPS xmm0, xmm0 ; 0 - 6
MOVAPS xmm3, xmm1 ; 1 - 7
ADDPS xmm1, xmm1 ; 1 - 5
MULPS xmm1, xmm2 ; 6 - 12
MOVAPS xmm2, xmm0 ; 7 - 13
MULPS xmm3, xmm3 ; 8 - 14
; xmm0 = zx^2 xmm1 = 2 * zy xmm2 = zx xmm3 = zy^2
if type = Julia
ADDPS xmm1, dqword[cy1]
else if type = Mandel
ADDPS xmm1, xmm5 ; 12 - 16
end if
SUBPS xmm0, xmm3 ; 14 - 18
ADDPS xmm2, xmm3 ; 16 - 20
; xmm0 = zx^2 - zy^2 xmm1 = 2*zx*zy xmm2 = zx^2 + zy^2 xmm3 = zy^2
if type = Julia
ADDPS xmm0, dqword[cx1]
else if type = Mandel
ADDPS xmm0, xmm4 ; 18 - 22
end if
CMPLEPS xmm2, xmm7 ; 20 - 24
MOVMSKPS eax, xmm2 ; 24 - 30
test eax, eax
jz EXIT
if color
ANDPS xmm2, xmm7 ; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
ADDPS xmm6, xmm2 ; 28 - 32
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
;21-22 cycles
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;vod
macro JuliaMandelPaintvod color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm1 ; 0 - 6 mov
MULPS xmm2, xmm2 ; 6 - 12 fp:mul
MULPS xmm1, xmm0 ; 0 - 6 fp:mul
MOVAPS xmm3, xmm2 ; 13 - 19 mov
MULPS xmm0, xmm0 ; 2 - 8 fp:mul
ADDPS xmm2, xmm0 ; 12 - 16 fp:add
CMPLEPS xmm2, xmm7 ; 16 - 20 fp:add
ADDPS xmm1, xmm1 ; 6 - 10 fp:add
SUBPS xmm0, xmm3 ; 19 - 23 fp:add
if type = Julia
ADDPS xmm1, dqword[cy1] ; 10 - 14 fp:add
ADDPS xmm0, dqword[cx1] ; 23 - 27 fp:add
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
MOVMSKPS eax, xmm2 ; 20 - 26 fp
test eax, eax ; 26 - 27 alu0/1
jz EXIT ; 26 - 27 alu0/1
if color
ANDPS xmm2, xmm7 ; 21 - 23 mmx:alu
ADDPS xmm6, xmm2 ; 24 - 28 fp:add
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
;CVTPS2DQ xmm6, xmm6
;MOVAPS [edi], xmm6
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;vod2
macro JuliaMandelPaintvod2 color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm1 ; 0 - 6 mov
MULPS xmm2, xmm2 ; 6 - 12 fp:mul
MULPS xmm1, xmm0 ; 0 - 6 fp:mul
MOVAPS xmm3, xmm2 ; 13 - 19 mov
MULPS xmm0, xmm0 ; 2 - 8 fp:mul
ADDPS xmm2, xmm0 ; 12 - 16 fp:add
ADDPS xmm1, xmm1 ; 6 - 10 fp:add
CMPLEPS xmm2, xmm7 ; 16 - 20 fp:add
SUBPS xmm0, xmm3 ; 19 - 23 fp:add
if type = Julia
ADDPS xmm1, dqword[cy1] ; 10 - 14 fp:add
ADDPS xmm0, dqword[cx1] ; 23 - 27 fp:add
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
MOVMSKPS eax, xmm2 ; 20 - 26 fp
test eax, eax ; 26 - 27 alu0/1
jz EXIT ; 26 - 27 alu0/1
if color
ANDPS xmm2, xmm7 ; 21 - 23 mmx:alu
ADDPS xmm6, xmm2 ; 24 - 28 fp:add
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi], eax
SHUFPS xmm6, xmm6, 0E5h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+4], eax
SHUFPS xmm6, xmm6, 0E6h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+8], eax
SHUFPS xmm6, xmm6, 0E7h
CVTSS2SI eax, xmm6
mov eax, [esi + eax]
mov [edi+12], eax
;CVTPS2DQ xmm6, xmm6
;MOVAPS [edi], xmm6
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
;vodps2dq
macro JuliaMandelPaintvodps2dq color, type
{
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
push ebx esi edi
mov edx, [h]
mov esi, [a]
mov edi, [bits]
if type = Julia
copyscr xmm0, cx2
copyscr xmm1, cy2
end if
copyscr xmm2, dx2
copyscr xmm3, dy2
if type = Julia
MOVAPS dqword[cx1], xmm0
MOVAPS dqword[cy1], xmm1
end if
MOVAPS dqword[dy1], xmm3
copyscr xmm4, LEFT
copyscr xmm5, TOP
MOVAPS xmm0, xmm2 ; xmm2 = 0 | dx2 | dx2 * 2 | dx2 * 3
ANDPS xmm0, dqword[mask1]
MOVAPS xmm1, xmm2
ANDPS xmm1, dqword[mask2]
ADDPS xmm0, xmm1
ADDPS xmm0, xmm2
ANDPS xmm0, dqword[mask]
ADDPS xmm4, xmm0
MOVAPS dqword[left1], xmm4
ADDPS xmm2, xmm2
ADDPS xmm2, xmm2
MOVAPS dqword[dx1], xmm2
MOVAPS xmm7, dqword[radius]
JMP BSTART
; xmm0 = zx xmm1 = zy xmm2 = tmp xmm3 = tmp xmm4 = zx2 xmm5 = zy2 xmm6 = result xmm7 = 4.0
; eax = tmp ebx = x ecx = i counter edx = y edi = bits pointer esi = color table
YLOOP:
MOVAPS xmm4, dqword[left1]
BSTART:
mov ebx, [w]
XLOOP:
MOVAPS xmm0,xmm4
XORPS xmm6,xmm6
MOVAPS xmm1,xmm5
mov ecx, ITER
ILOOP:
; xmm0 = zx xmm1 = zy
MOVAPS xmm2, xmm1 ; 0 - 6 mov
MULPS xmm2, xmm2 ; 6 - 12 fp:mul
MULPS xmm1, xmm0 ; 0 - 6 fp:mul
MOVAPS xmm3, xmm2 ; 13 - 19 mov
MULPS xmm0, xmm0 ; 2 - 8 fp:mul
ADDPS xmm2, xmm0 ; 12 - 16 fp:add
CMPLEPS xmm2, xmm7 ; 16 - 20 fp:add
ADDPS xmm1, xmm1 ; 6 - 10 fp:add
SUBPS xmm0, xmm3 ; 19 - 23 fp:add
if type = Julia
ADDPS xmm1, dqword[cy1] ; 10 - 14 fp:add
ADDPS xmm0, dqword[cx1] ; 23 - 27 fp:add
else if type = Mandel
ADDPS xmm1, xmm5
ADDPS xmm0, xmm4
end if
MOVMSKPS eax, xmm2 ; 20 - 26 fp
test eax, eax ; 26 - 27 alu0/1
jz EXIT ; 26 - 27 alu0/1
if color
ANDPS xmm2, xmm7 ; 21 - 23 mmx:alu
ADDPS xmm6, xmm2 ; 24 - 28 fp:add
end if
sub ecx, 1
jnz ILOOP
EXIT:
if color
CVTPS2DQ xmm6, xmm6
MOVAPS dqword[scratch], xmm6
mov eax, [scratch]
mov ecx, [scratch+4]
mov eax, [esi + eax]
mov ecx, [esi + ecx]
mov [edi], eax
mov [edi+4], ecx
mov eax, [scratch+8]
mov ecx, [scratch+12]
mov eax, [esi + eax]
mov ecx, [esi + ecx]
mov [edi+8], eax
mov [edi+12], ecx
;CVTPS2DQ xmm6, xmm6
;MOVAPS [edi], xmm6
else
ANDNPS xmm2, dqword[maskbw]
MOVAPS [edi], xmm2
end if
add edi, 16
ADDPS xmm4, dqword[dx1]
sub ebx, 4
jnz XLOOP
ADDPS xmm5, dqword[dy1]
sub edx, 1
jnz YLOOP
pop edi esi ebx
}
macro testone name
{
startm
JuliaMandelPaint#name 1, Julia
endm
mov [jt],eax
startm
JuliaMandelPaint#name 1, Mandel
endm
invoke wsprintf,buff,m_#name,[jt],eax
invoke WriteConsole,[hOut],buff,eax,read,0
invoke WriteFile,[hFile],buff,[read],read,0
}
invoke GetStdHandle, STD_OUTPUT_HANDLE
mov [hOut], eax
;Parse command line
invoke GetCommandLine
mov edx, buff
mov ebx, ' '
cmp byte[eax], '"' ; if commandline[0] == '"' then search for '"' else search for ' '
jnz @F
inc eax
mov ebx, '"'
@@: ; search loop
movzx ecx, byte[eax]
mov [edx], cl
add eax, 1
add edx, 1
or ecx, ecx ; check for a null char
jz @F
cmp ecx, ebx
jnz @B
@@:
lea eax, [edx-1] ; search backward for '\' or '/'
mov edx, buff
@@:
sub eax, 1
cmp byte[eax],'\'
je slashfound
cmp byte[eax],'/'
je slashfound
cmp eax, edx
ja @B
; no path in the command line
invoke GetCurrentDirectory,256,buff
add eax, buff
cmp byte[eax-1],'\'
jz @F
mov byte[eax],'\'
slashfound:
inc eax
@@:
mov dword[eax], 'tst.'
mov dword[eax+4], 'log'
invoke CreateFile,buff,GENERIC_WRITE,0,NULL,OPEN_ALWAYS,FILE_ATTRIBUTE_NORMAL,0
cmp eax,INVALID_HANDLE_VALUE
jz errnoinfo
mov [hFile],eax
invoke SetFilePointer,eax,0,0,FILE_END
call PrintSysInfo
invoke WriteConsole,[hOut],buff,eax,read,0
invoke WriteFile,[hFile],buff,[read],read,0
call IsSSE
or eax, eax
jz errnosse
call IsSSE2
mov [sse2flag], eax
invoke GetCurrentProcess
invoke SetPriorityClass, eax, REALTIME_PRIORITY_CLASS
invoke GetCurrentThread
invoke SetThreadPriority, eax, THREAD_PRIORITY_TIME_CRITICAL
mainloop:
testone MUL
testone FFFF
testone MOVADD
testone MOVADD2
mov eax, [sse2flag]
or eax, eax
jz @F
testone MOVADD2ps2dq
@@:
testone MOVADD3
testone MOVADD4
testone vod
mov eax, [sse2flag]
or eax, eax
jz @F
testone vodps2dq
@@:
testone vod2
dec [i]
jnz mainloop
invoke GetCurrentProcess
invoke SetPriorityClass, eax, NORMAL_PRIORITY_CLASS
invoke GetCurrentThread
invoke SetThreadPriority, eax, THREAD_PRIORITY_NORMAL
invoke Sleep, 500
invoke CloseHandle,[hFile]
invoke ExitProcess,0
errnoinfo:
invoke WriteConsole,[hOut],msgnoinfo,msgnosse-msgnoinfo,read,0
invoke ExitProcess,0
errnosse:
invoke WriteConsole,[hOut],msgnosse,bits-msgnosse,read,0
invoke CloseHandle,[hFile]
invoke ExitProcess,0
macro tohex reg
{
and reg, 0xF
cmp reg, 0xA ; Convert to hex
jb @F
add reg, 0x11 - 0xA
@@:
add reg, 0x30
}
PrintSysInfo:
push esi
mov esi, buff
pushfd
pop eax
mov edx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
xor eax, edx
jz oldcpu ; 386 or 486, can't use CPUID
xor eax, eax
cpuid
; Find vendor
cmp ebx, 'Genu'
jnz nonintel
cmp edx, 'ineI'
jnz nonintel
cmp ecx, 'ntel'
jnz nonintel
mov dword[esi], 'Inte'
mov dword[esi+4], 'l 0x'
getver:
; Get version information
xor eax, eax
inc eax
cpuid
mov ecx, eax
shr ecx, 8 ; Get family ID
tohex ecx
mov byte[esi+8],cl
mov ecx, eax
shr ecx, 4 ; Get model
tohex ecx
mov byte[esi+9],cl
and eax, 0xF ; Get stepping
tohex eax
mov byte[esi+10],al
mov byte[esi+11],' '
mov dword[esi+12],'bran'
mov dword[esi+16],'d in'
mov dword[esi+20],'dex='
mov eax, ebx
shr eax, 4
tohex eax
mov byte[esi+24],al
tohex ebx
mov byte[esi+25],al
mov byte[esi+26],10
;Get brand string
mov eax, 0x80000000
cpuid
cmp eax, 0x80000004
jb nobrandstr ; Brand string not supported
mov eax, 0x80000002
cpuid
mov dword[esi+27],eax
mov dword[esi+31],ebx
mov dword[esi+35],ecx
mov dword[esi+39],edx
mov eax, 0x80000003
cpuid
mov dword[esi+43],eax
mov dword[esi+47],ebx
mov dword[esi+51],ecx
mov dword[esi+55],edx
mov eax, 0x80000004
cpuid
mov dword[esi+59],eax
mov dword[esi+63],ebx
mov dword[esi+67],ecx
mov dword[esi+71],edx
mov byte[esi+75],10
mov eax,76
pop esi
ret
nobrandstr:
mov eax,27
pop esi
ret
nonintel:
cmp ebx, 'Auth'
jnz nonamd
cmp edx, 'enti'
jnz nonamd
cmp ecx, 'cAMD'
jnz nonamd
mov dword[esi], 'AMD '
mov dword[esi+4], ' 0x'
jmp getver
nonamd:
mov dword[esi], 'Unkn'
mov dword[esi+4], 'own '
mov dword[esi+8], 'CPU:'
mov dword[esi+12], ebx
mov dword[esi+16], edx
mov dword[esi+20], ecx
mov dword[esi+24], 10
mov eax, 25
pop esi
ret
oldcpu:
mov dword[esi], '386 '
mov dword[esi+4], 'or 4'
mov dword[esi+8], 0x000A3638
mov eax, 11
pop esi
ret
IsSSE:
pushfd
pop eax
mov edx, eax
xor eax, 00200000h
push eax
popfd
pushfd
pop eax
xor eax, edx
jz nosse2 ; 386 or 486, can't use CPUID
xor eax, eax
inc eax
cpuid
xor eax, eax
test edx, 02000000h
jz nosse2 ; SSE not supported
push SEHhandler ; try to execute an SSE instruction
push dword[FS:0]
mov [FS:0], esp
XORPS xmm0, xmm0
pop dword[FS:0]
pop edx
inc eax
ret
nosse:
pop dword[FS:0]
pop edx
nosse2:
ret
IsSSE2:
xor eax, eax
inc eax
cpuid
xor eax, eax
test edx, 04000000h
jz nosse2 ; SSE2 not supported
inc eax
ret
virtual at eax
EXCEPTION_RECORD:
.ExceptionCode dd ?
.ExceptionFlag dd ?
.NestedExceptionRecord dd ?
.ExceptionAddress dd ?
.NumberParameters dd ?
.AdditionalData dd ?
end virtual
virtual at eax
CONTEXT:
.ContextFlags dd ?
;DEBUG REGISTERS
.Dr dd 6 dup ?
;FLOATING POINT
.ControlWord dd ?
.StatusWord dd ?
.TagWord dd ?
.ErrorOffset dd ?
.ErrorSelector dd ?
.DataOffset dd ?
.DataSelector dd ?
.FPURegs db 80 dup ?
.Cr0NpxState dd ?
;SEGMENT REGISTERS
.SegGs dd ?
.SegFs dd ?
.SegEs dd ?
.SegDs dd ?
;GENERAL-PURPOSE REGISTERS
.GPRedi dd ?
.GPResi dd ?
.GPRebx dd ?
.GPRedx dd ?
.GPRecx dd ?
.GPReax dd ?
.GPRebp dd ?
.GPReip dd ?
.SegCs dd ?
.GRPflags dd ?
.GRPesp dd ?
.SegSS dd ?
end virtual
SEHhandler:
mov eax, [esp+04] ; get EXCEPTION_RECORD structure address
cmp [EXCEPTION_RECORD.ExceptionCode], STATUS_ILLEGAL_INSTRUCTION
jnz nexthandler
mov eax, [EXCEPTION_RECORD.ExceptionFlag]
or eax, eax
jnz nexthandler
mov eax, [esp+12] ; get CONTEXT structure address
mov [CONTEXT.GPReip], nosse
xor eax, eax
ret
nexthandler: ; allow system handler to show error message
mov eax, 1
ret
data import
library kernel32,'KERNEL32.DLL',\
user32,'USER32.DLL'
include '%fasminc%\apia\kernel32.inc'
include '%fasminc%\apia\user32.inc'
end data
align 16
radius dd 4.0, 4.0, 4.0, 4.0
cx1 dd 4 dup ?
cy1 dd 4 dup ?
dx1 dd 4 dup ?
dy1 dd 4 dup ?
left1 dd 4 dup ?
cc dd 1.0, 1.0, 1.0, 1.0
maskbw dd 0x808080, 0x808080, 0x808080, 0x808080
mask dd 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
mask1 dd 0, 0, 0, 0xFFFFFFFF
mask2 dd 0, 0, 0xFFFFFFFF, 0xFFFFFFFF
scratch dd 0,0,0,0
reala dd 0x00323232, 0x00353630, 0x00383a2e, 0x003b3e2c, 0x003e422a,\
0x00414628, 0x00444a26, 0x00474e24, 0x004a5222, 0x004d5620,\
0x00505a1e, 0x00535e1c, 0x0056621a, 0x00596618, 0x005c6a16,\
0x005f6e14, 0x00627212, 0x00657610, 0x00687a0e, 0x006b7e0c,\
0x006e820a, 0x00718608, 0x00748a06, 0x00778e04, 0x007a9202,\
0x007d9600, 0x00809a02, 0x00839e04, 0x0086a206, 0x0089a608,\
0x008caa0a, 0x008fae0c, 0x0092b20e, 0x0095b610, 0x0098ba12,\
0x009bbe14, 0x009ec216, 0x00a1c618, 0x00a4ca1a, 0x00a7ce1c,\
0x00aad21e, 0x00add620, 0x00b0da22, 0x00b3de24, 0x00b6e226,\
0x00b9e628, 0x00bcea2a, 0x00bfee2c, 0x00c2ee2e, 0x00c5ea30,\
0x00c8e632, 0x00cbe234, 0x00cede36, 0x00d1da38, 0x00d4d63a,\
0x00d7d23c, 0x00dace3e, 0x00ddca40, 0x00e0c642, 0x00e3c244,\
0x00e6be46, 0x00e9ba48, 0x00ecb64a, 0x00efb24c, 0x00000000
LEFT dd -2.0
TOP dd -1.0
cx2 dd -0.12
cy2 dd 0.74
w dd 1024
h dd 719
dx2 dd 0.0029296875000000000
dy2 dd 0.0027816411682892906
m_MUL db "MUL %10d %10d",10,0
m_FFFF db "FFFF %10d %10d",10,0
m_MOVADD db "MOVADD %10d %10d",10,0
m_MOVADD2 db "MOVADD2 %10d %10d",10,0
m_MOVADD2ps2dq db "MOVADD2ps2dq %10d %10d",10,0
m_MOVADD3 db "MOVADD3 %10d %10d",10,0
m_MOVADD4 db "MOVADD4 %10d %10d",10,0
m_vod db "vod %10d %10d",10,0
m_vodps2dq db "vodps2dq %10d %10d",10,0
m_vod2 db "vod2 %10d %10d",10,0
msgnoinfo db "Can't write log file. Copy the program to writable disk.",10
msgnosse db "Your processor doesn't support SSE. The test can't continue, sorry.",10
bits dd realbits
a dd reala
align 4
b dd ?
jt dd ?
hOut dd ?
hFile dd ?
read dd ?
sse2flag dd ?
i dd 5
buff db 256 dup ?
realbits dd 1024*768 dup ?