Click here to Skip to main content
Click here to Skip to main content
Add your own
alternative version
Go to top

Generating Fractals with SSE/SSE2

, 29 Nov 2005
An article on generating Mandelbrot and Julia sets using Intel's Streaming SIMD Extensions (SSE, SSE2).
fractalssse_src.zip
zoom.cur
bitmap1.bmp
DEFAULT1.BIN
fractals.exe
fractals.ICO
SSEroutines_test.exe
; Rabbit - 78.8
; Mandel - 90.5
format MS COFF

include '%fasminc%\win32a.inc'
section '.data' data readable writeable align 16
align 16
radius dd 4.0, 4.0, 4.0, 4.0
radiusd dq 4.0, 4.0
px1 dd 4 dup ?
py1 dd 4 dup ?
dx1 dd 4 dup ?
dy1 dd 4 dup ?
left1 dd 4 dup ?
maskbw dd 0x808080, 0x808080, 0x808080, 0x808080
mask   dd 0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF
mask1  dd 0, 0, 0, 0xFFFFFFFF
mask2  dd 0, 0, 0xFFFFFFFF, 0xFFFFFFFF
useMovAdd dd 1
ITER = 64 ; The number of iterations
Julia  = 1
Mandel = 0

section '.text' code readable executable
macro defineinstr instr {
   macro instr [args] \{
   \common
      if ssewidth = 4
	 instr#S args
      else
	 instr#D args
      end if
   \}
}
defineinstr MOVAP
defineinstr XORP
defineinstr ADDP
defineinstr MULP
defineinstr SUBP
defineinstr ANDP
defineinstr MOVMSKP
defineinstr ANDNP
defineinstr CMPLEP

macro copyscr reg, from {
   if ssewidth = 4
      MOVSS    reg, dword[from]
      SHUFPS   reg, reg, 0
   else
      MOVSD    reg, qword[from]
      SHUFPD   reg, reg, 0
   end if
}

macro mycvt a, x {
   if ssewidth = 4
      CVTTSS2SI eax, x
   else
      CVTTSD2SI eax, x
   end if
      mov eax, [esi + eax]
      mov a, eax
}

macro ConvertAndWrite color {
     if color
       if ssewidth = 4
	mycvt [edi], xmm6
	SHUFPS xmm6, xmm6, 0E5h
	mycvt [edi+4], xmm6
	SHUFPS xmm6, xmm6, 0E6h
	mycvt [edi+8], xmm6
	SHUFPS xmm6, xmm6, 0E7h
	mycvt [edi+12], xmm6
       else if ssewidth = 2
	mycvt [edi], xmm6
	SHUFPD xmm6, xmm6, 1
	mycvt [edi+4], xmm6
       end if
     else if ~color
	ANDNP  xmm2, dqword[maskbw]
       if ssewidth = 4
	MOVAPS	[edi], xmm2
       else
	SHUFPS	xmm2, xmm2, 08h
	MOVLPS	[edi], xmm2
       end if
     end if
}

macro JuliaMandelPaintMovAdd color, type {
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAP xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAP xmm0,xmm4
	XORP  xmm6,xmm6
	MOVAP xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAP xmm2, xmm0       ;  0 -  6
	MULP  xmm0, xmm0       ;  0 -  6
	MOVAP xmm3, xmm1       ;  1 -  7
	ADDP  xmm1, xmm1       ;  1 -  5
	MULP  xmm1, xmm2       ;  6 - 12
	MOVAP xmm2, xmm0       ;  7 - 13
	MULP  xmm3, xmm3       ;  8 - 14
	; xmm0 = zx^2           xmm1 = 2 * zy      xmm2 = zx           xmm3 = zy^2
     if type = Julia
	ADDP  xmm1, dqword[py1]
     else if type = Mandel
	ADDP  xmm1, xmm5       ; 12 - 16
     end if
	SUBP  xmm0, xmm3       ; 14 - 18
	ADDP  xmm2, xmm3       ; 16 - 20
	; xmm0 = zx^2 - zy^2    xmm1 = 2*zx*zy     xmm2 = zx^2 + zy^2  xmm3 = zy^2
	CMPLEP	xmm2, xmm7     ; 20 - 24
     if type = Julia
	ADDP  xmm0, dqword[px1]
     else if type = Mandel
	ADDP  xmm0, xmm4       ; 18 - 22
     end if
	MOVMSKP eax, xmm2      ; 24 - 30
	test eax, eax
	jz EXIT
     if color
	ANDP  xmm2, xmm7       ; 25 - 27 ; xmm6 += (xmm2 < radius) ? 4.0 : 0.0;
	ADDP  xmm6, xmm2       ; 28 - 32
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
	ConvertAndWrite color
	add edi, ssewidth * 4
	ADDP  xmm4, dqword[dx1]
	sub ebx, ssewidth
	ja XLOOP

	ADDP  xmm5, dqword[dy1]
	add edi, [piw]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx
	ret
}

macro JuliaMandelPaintVod color, type {
local YLOOP, XLOOP, ILOOP, EXIT, BSTART
	; xmm0 = zx    xmm1 = zy    xmm2 = tmp    xmm3 = tmp    xmm4 = zx2    xmm5 = zy2    xmm6 = result   xmm7 = 4.0
	; eax = tmp    ebx = x      ecx = i counter      edx = y      edi = bits pointer    esi = color table
YLOOP:
	MOVAP xmm4, dqword[left1]
BSTART:
	mov ebx, [w]
XLOOP:
	MOVAP xmm0,xmm4
	XORP  xmm6,xmm6
	MOVAP xmm1,xmm5
	mov ecx, ITER
ILOOP:
	; xmm0 = zx             xmm1 = zy
	MOVAP xmm2, xmm1	 ; 0  - 6       mov
	MULP  xmm2, xmm2	 ; 6  - 12      fp:mul
	MULP  xmm1, xmm0	 ; 0  - 6       fp:mul

	MOVAP xmm3, xmm2	 ; 13 - 19      mov
	MULP  xmm0, xmm0	 ; 2  - 8       fp:mul
	ADDP  xmm2, xmm0	 ; 12 - 16      fp:add

	CMPLEP	xmm2, xmm7	 ; 16 - 20      fp:add
	ADDP  xmm1, xmm1	 ; 6  - 10      fp:add
	SUBP  xmm0, xmm3	 ; 19 - 23      fp:add
     if type = Julia
	ADDP  xmm1, dqword[py1]  ; 10 - 14      fp:add
	ADDP  xmm0, dqword[px1]  ; 23 - 27      fp:add
     else if type = Mandel
	ADDP  xmm1, xmm5
	ADDP  xmm0, xmm4
     end if
	MOVMSKP eax, xmm2	 ; 20 - 26      fp
	test eax, eax		  ; 26 - 27      alu0/1
	jz EXIT 		  ; 26 - 27      alu0/1
     if color
	ANDP  xmm2, xmm7	 ; 21 - 23      mmx:alu
	ADDP  xmm6, xmm2	 ; 24 - 28      fp:add
     end if
	sub ecx, 1
	jnz ILOOP
EXIT:
	ConvertAndWrite color
	add edi, ssewidth * 4
	ADDP  xmm4, dqword[dx1]
	sub ebx, ssewidth
	ja XLOOP

	ADDP  xmm5, dqword[dy1]
	add edi, [piw]
	sub edx, 1
	jnz YLOOP

	pop	edi esi ebx
	ret
}

macro SelectTypeAndColor method {
local JULIABW, MANDEL, MANDELBW
	test eax, eax
	jz MANDEL ; Mandel = 0, Julia = 1
	; Julia
	copyscr xmm2, px
	copyscr xmm3, py
	MOVAP dqword[px1], xmm2
	MOVAP dqword[py1], xmm3
	test ebx, ebx
	jnz JULIABW
	method 1, Julia
JULIABW:
	method 0, Julia
MANDEL:
	test ebx, ebx
	jnz MANDELBW
	method 1, Mandel
MANDELBW:
	method 0, Mandel
}

macro SelectMethod {
local VOD
	mov ecx, [useMovAdd]
	mov eax, [type]
	mov ebx, [bw]
	test ecx, ecx
	jz VOD
	SelectTypeAndColor JuliaMandelPaintMovAdd
VOD:
	SelectTypeAndColor JuliaMandelPaintVod
}

proc PaintJuliaMandelSSE c w,h,dx2,dy2,bits,a,LEFT,TOP,px,py,piw,type,bw
	ssewidth = 4 ; calculating four pixels at the same time
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
	copyscr xmm2, dx2
	copyscr xmm3, dy2
	MOVAPS dqword[dy1], xmm3
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	MOVAPS	xmm0, xmm2	      ; xmm2 = 0       | dx2     | dx2 * 2 | dx2 * 3
	ANDPS	xmm0, dqword[mask1]
	MOVAPS	xmm1, xmm2
	ANDPS	xmm1, dqword[mask2]
	ADDPS	xmm0, xmm1
	ADDPS	xmm0, xmm2
	ANDPS	xmm0, dqword[mask]
	ADDPS	xmm4, xmm0
	MOVAPS	dqword[left1], xmm4
	ADDPS	xmm2, xmm2
	ADDPS	xmm2, xmm2
	MOVAPS	dqword[dx1], xmm2
	MOVAPS	xmm7, dqword[radius]
	SelectMethod
endp

proc PaintJuliaMandelSSE2 c w,h,dx2:qword,dy2:qword,bits,a,LEFT:qword,TOP:qword,px:qword,py:qword,piw,type,bw
	ssewidth = 2 ; two pixels at the same time
	push	ebx esi edi
	mov edx, [h]
	mov esi, [a]
	mov edi, [bits]
	copyscr xmm3, dy2
	XORPD xmm2, xmm2
	MOVAPD dqword[dy1], xmm3
	MOVHPD	xmm2, [dx2]
	copyscr xmm4, LEFT
	copyscr xmm5, TOP
	ADDPD	xmm4, xmm2
	MOVAPD	dqword[left1], xmm4
	ADDPD	xmm2, xmm2
	SHUFPD	xmm2, xmm2, 3
	MOVAPD	dqword[dx1], xmm2
	MOVAPD	xmm7, dqword[radiusd]
	SelectMethod
endp

IsSSE:
	pushfd
	pop eax
	mov edx, eax
	xor eax, 00200000h
	push eax
	popfd
	pushfd
	pop eax
	xor eax, edx
	jz nosse1 ; 386 or 486, can't use CPUID
	xor eax, eax
	inc eax
	cpuid
	xor eax, eax
	test edx, 02000000h
	jz nosse1 ; SSE not supported

	push SEHhandler  ; try to execute an SSE instruction
	push dword[FS:0]
	mov [FS:0], esp
	XORPS xmm0, xmm0
	inc eax
nosse:
	pop dword[FS:0]
	pop edx
nosse1:
	ret

IsSSE2:
	xor eax, eax
	inc eax
	cpuid
	xor eax, eax
	test edx, 04000000h
	jz nosse2 ; SSE2 not supported
	inc eax
nosse2:
	ret

IsMovAdd:
	xor eax, eax
	cpuid		      ; chech if Intel Family 6 => then use Vodnaya
	xor eax, eax
	inc eax
	cmp ebx, 'Genu'
	jne noVod
	cmp edx, 'ineI'
	jne noVod
	cmp ecx, 'ntel'
	jne noVod
	cpuid
	and eax, 0xF00
	sub eax, 0x600
	mov [useMovAdd], eax
noVod:
	ret


virtual at eax
EXCEPTION_RECORD:
  .ExceptionCode dd ?
  .ExceptionFlag dd ?
  .NestedExceptionRecord dd ?
  .ExceptionAddress dd ?
  .NumberParameters dd ?
  .AdditionalData dd ?
end virtual

virtual at eax
CONTEXT:
  .ContextFlags dd ?
  ;DEBUG REGISTERS
  .Dr dd 6 dup ?
  ;FLOATING POINT
  .ControlWord dd ?
  .StatusWord dd ?
  .TagWord dd ?
  .ErrorOffset dd ?
  .ErrorSelector dd ?
  .DataOffset dd ?
  .DataSelector dd ?
  .FPURegs db 80 dup ?
  .Cr0NpxState dd ?
  ;SEGMENT REGISTERS
  .SegGs dd ?
  .SegFs dd ?
  .SegEs dd ?
  .SegDs dd ?
  ;GENERAL-PURPOSE REGISTERS
  .GPRedi dd ?
  .GPResi dd ?
  .GPRebx dd ?
  .GPRedx dd ?
  .GPRecx dd ?
  .GPReax dd ?
  .GPRebp dd ?
  .GPReip dd ?
  .SegCs dd ?
  .GRPflags dd ?
  .GRPesp dd ?
  .SegSS dd ?
end virtual

SEHhandler:
	mov eax, [esp+04] ; get EXCEPTION_RECORD structure address
	cmp [EXCEPTION_RECORD.ExceptionCode], STATUS_ILLEGAL_INSTRUCTION
	jnz nexthandler
	mov eax, [EXCEPTION_RECORD.ExceptionFlag]
	test eax, eax
	jnz nexthandler
	mov eax, [esp+12] ; get CONTEXT structure address
	mov [CONTEXT.GPReip], nosse
	xor eax, eax
	ret
nexthandler: ; allow system handler to show error message
	mov eax, 1
	ret


public PaintJuliaMandelSSE as '_PaintJuliaMandelSSE'
public PaintJuliaMandelSSE2 as '_PaintJuliaMandelSSE2'
public IsSSE as '_IsSSE'
public IsSSE2 as '_IsSSE2'
public IsMovAdd as '_IsMovAdd'

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Share

About the Author

Peter Kankowski
Software Developer
Russian Federation Russian Federation
Peter lives in Siberia, the land of sleeping sun, beautiful mountains, and infinitely deep snow. He recently started a wiki about algorithms and code optimization, where people could share their ideas, learn, and teach others.

| Advertise | Privacy | Mobile
Web02 | 2.8.140916.1 | Last Updated 29 Nov 2005
Article Copyright 2005 by Peter Kankowski
Everything else Copyright © CodeProject, 1999-2014
Terms of Service
Layout: fixed | fluid