Click here to Skip to main content
Click here to Skip to main content
 
Add your own
alternative version

High performance computing from C++ to MMX

, 30 Jul 2003
Boosting you application performance to the optimum by using hardware acceleration.
// MMXDemoDoc.cpp : implementation of the CMMXDemoDoc class
//

#include "stdafx.h"
#include "MMXDemo.h"

#include "MMXDemoDoc.h"
#include "childfrm.h"

#ifdef _DEBUG
#define new DEBUG_NEW
#undef THIS_FILE
static char THIS_FILE[] = __FILE__;
#endif

/////////////////////////////////////////////////////////////////////////////
// CMMXDemoDoc

IMPLEMENT_DYNCREATE(CMMXDemoDoc, CDocument)

BEGIN_MESSAGE_MAP(CMMXDemoDoc, CDocument)
	//{{AFX_MSG_MAP(CMMXDemoDoc)
	ON_COMMAND(ID_RUNBASIC, OnRunbasic)
	ON_COMMAND(ID_RUNMMX, OnRunmmx)
	ON_COMMAND(ID_RUNOPT, OnRunopt)
	ON_COMMAND(ID_CLEAR, OnClear)
	ON_COMMAND(ID_CHECKSSE, OnChecksse)
	ON_COMMAND(ID_RUNASM, OnRunasm)
	//}}AFX_MSG_MAP
END_MESSAGE_MAP()

/////////////////////////////////////////////////////////////////////////////
// CMMXDemoDoc construction/destruction

CMMXDemoDoc::CMMXDemoDoc()
{
	// Set default image size
	m_iWidth = 1024;
	m_iHeight = 1024;

	// Generate 2 source image
	GenerateImage();
}

CMMXDemoDoc::~CMMXDemoDoc()
{
	// delete all generated image
	delete m_pImg;
	delete m_pImg1;
	delete m_pImg2;
}

BOOL CMMXDemoDoc::OnNewDocument()
{
	if (!CDocument::OnNewDocument())
		return FALSE;

	///////////////////////////////////////////////////////////////////
	// This section only prepare 2 additional views for display 
	///////////////////////////////////////////////////////////////////

	// Create view 1 with the current template frame
	CFrameWnd *pFrame=NULL;
	CRuntimeClass* pRuntimeViewClass = RUNTIME_CLASS(CMiniView);
	// Create the frame Window
	pFrame = CreateNewView(pRuntimeViewClass);
	CSize size;
	size.cx = GetSystemMetrics(SM_CXSCREEN);
	size.cy = GetSystemMetrics(SM_CYSCREEN);
	pFrame->SetWindowPos(&CWnd::wndTop, size.cx*2/3, 0, size.cx/3, size.cy/3, SWP_SHOWWINDOW);
	m_pMiniView1 = (CMiniView *) pFrame->GetActiveView();
	m_pMiniView1->m_szText = "Src Image 1";
	m_pMiniView1->m_pImg = m_pImg1;

	// Create view 2 with the current template frame
	// Create the Frame Window
	pFrame = CreateNewView(pRuntimeViewClass);
	pFrame->SetWindowPos(&CWnd::wndTop, size.cx*2/3, size.cy/3, size.cx/3, size.cy/3, SWP_SHOWWINDOW);
	m_pMiniView2 = (CMiniView *) pFrame->GetActiveView();
	m_pMiniView2->m_szText = "Src Image 2";
	m_pMiniView2->m_pImg = m_pImg2;

	return TRUE;
}



/////////////////////////////////////////////////////////////////////////////
// CMMXDemoDoc serialization

void CMMXDemoDoc::Serialize(CArchive& ar)
{
	if (ar.IsStoring())
	{
		// TODO: add storing code here
	}
	else
	{
		// TODO: add loading code here
	}
}

/////////////////////////////////////////////////////////////////////////////
// CMMXDemoDoc diagnostics

#ifdef _DEBUG
void CMMXDemoDoc::AssertValid() const
{
	CDocument::AssertValid();
}

void CMMXDemoDoc::Dump(CDumpContext& dc) const
{
	CDocument::Dump(dc);
}
#endif //_DEBUG

/////////////////////////////////////////////////////////////////////////////
// CMMXDemoDoc commands
CFrameWnd *CMMXDemoDoc::CreateNewView(CRuntimeClass *pViewClass)
{
	// Get the first view
	POSITION pos = GetFirstViewPosition();
	CMDIChildWnd* pActiveChild = (CMDIChildWnd*)GetNextView(pos);

	// Retrieve document pointer and document template
	CDocument* pDocument = this;
	CDocTemplate* pTemplate = pDocument->GetDocTemplate();
	ASSERT_VALID(pTemplate);
	
	// Create a new view
	CRuntimeClass* pRuntimeViewClass = pViewClass;

	CCreateContext newContext;
	newContext.m_pNewViewClass = pRuntimeViewClass;
	newContext.m_pNewDocTemplate = pTemplate;
	newContext.m_pLastView = NULL;
	newContext.m_pCurrentFrame = pActiveChild;
	newContext.m_pCurrentDoc = this;

	//////////////////////////////////////////////////////
	// Create the frame now
	CRuntimeClass* pFrameClass = RUNTIME_CLASS(CChildFrame);
	CObject* pObject = pFrameClass->CreateObject();
	if (pFrameClass == NULL)
	{
		TRACE0("Error: you must override CDocTemplate::CreateNewFrame.\n");
		ASSERT(FALSE);
		return NULL;
	}
	CFrameWnd* pFrame = (CFrameWnd*)pObject;
	if (pFrame == NULL)
	{
		TRACE1("Warning: Dynamic create of frame %hs failed.\n",
			pFrameClass->m_lpszClassName);
		return NULL;
	}
	ASSERT_KINDOF(CFrameWnd, pFrame);

	// create new from resource
	if (!pFrame->LoadFrame(IDR_MAINFRAME,
			WS_OVERLAPPEDWINDOW | FWS_ADDTOTITLE,   // default frame styles
			NULL, &newContext))
	{
		TRACE0("Warning: CDocTemplate couldn't create a frame.\n");
		// frame will be deleted in PostNcDestroy cleanup
		return NULL;
	}

	// it worked !
	//////////////////////////////////////////////////////
	pTemplate->InitialUpdateFrame(pFrame, pDocument);

	// Get the active view attached to the active MDI child
	// window.
	//CView *pView = (CView *) pFrame->GetActiveView();
	return pFrame;
}

void CMMXDemoDoc::GenerateImage()
{
	int y=0, x=0;

	// generate some pattern for both images. 8 bit per pixel
	m_iSize = m_iWidth * m_iHeight;
	m_pImg = new BYTE[m_iSize];
	m_pImg1 = new BYTE[m_iSize];
	m_pImg2 = new BYTE[m_iSize];

	// Black is 0, white is 255
	memset (m_pImg, 0, m_iSize);		// Set blank image

	// Generate 1st image
	memset (m_pImg1, 0, m_iSize);		// Set vertical line image
	for (y=0; y<m_iHeight; y++)
	{
		for (x=0; x<m_iWidth; x+=10)
		{
			m_pImg1[x+(y*m_iWidth)] = 255;
		}
	}

	// Generate 2nd image, gradient shade of gray image
	for (y=0; y<m_iHeight; y++)
	{
		int iGray = y % 256;
		memset (m_pImg2 + (y*m_iWidth), iGray, m_iWidth);		// everyline has diff shades of gray
	}
}

/////////////////////////////////////////////////////////////////////////////
//
// Comparisons between basic code, optimise code and MMX code
//
/////////////////////////////////////////////////////////////////////////////

// Check the avilability of SSE features
int CheckSSEAvail()
{
	int iAvail = 0;
	
	// The CPU feature flags are bit MMX, FXSR, SSE, SSE2, HHT - 23, 24, 25, 26, 28
	__asm
	{
		mov eax, 1
		cpuid					; execute cpuid detection
		shr edx, 0x1a			; check for bit 25

		jnc noCarryFlag			; compare ON/OFF
		mov [iAvail], 1			; Set Avail to 1

		noCarryFlag:
	}

	return iAvail;
}

// Run add operation using MMX register set
// This function is not optimise with loop unrolling.
// Function is provided as a learning tool.
int MMXAdd(BYTE *d, BYTE *s, int w, int h)
{
	int iCount = w * h;

	// we assume all data in the register is not used by others
	__asm
	{
		// Assign pointers to register
		mov			esi, [s]			; put src addr to esi reg
		mov			edi, [d]			; put dest addr to edi reg
		mov			ecx, [iCount]		; put count to ecx reg
		shr			ecx, 3				; divide count with 8 by shifting 3 bits to right

		codeloop:
		movq		mm0, [esi]			; mov 8 bytes of src data to mmx reg 0
		movq		mm1, [edi]			; mov 8 bytes of dest data to mmx reg 1
		PADDUSB		mm0, mm1			; Add unsigned 8bit saturated to mmx reg 0
		movq		[edi], mm0			; dump back the added 8 bytes of data to dest memory
		add			esi, 8				; add src pointer by 8 bytes
		add			edi, 8				; add dest pointer by 8 bytes

		dec			ecx					; decrement count by 1
		jnz			codeloop			; jump to codeloop if not Zero
		emms							; Restore FPU state to normal

	}

	return 1;
}

// a representation in assembly language on how OnRunOpt() may look like
int AsmAdd(BYTE *d, BYTE *s, int w, int h)
{
	int iCount = w * h;

	// we assume all data in the register is not used by others
	__asm
	{
		// Assign pointers to register
		mov			esi, [s]			; put src addr to esi reg
		mov			edi, [d]			; put dest addr to edi reg
		mov			ecx, [iCount]		; put count to ecx reg

		codeloop:
		mov			al, [edi]			; mov a byte of src data to low word of eax register
		add			al, [esi];			; Add 8 bit from dest ptr to al
		jc			nosave				; jump if carry flag on
		mov			[edi], al
		nosave:
		inc			esi
		inc			edi
		dec			ecx					; decrement count by 1
		jnz			codeloop			; jump to codeloop if not Zero

	}

	return 1;
}

// This function demonstrates common practise.
// Programmers use array index to access memory.
// Total operations count for every cycle (inner loop) more than 10
void CMMXDemoDoc::OnRunbasic() 
{
	int x=0, y=0, i=0, iGray=0;

	// Start timing
	m_el.Begin();

	// Add 2 image using direct memory access
	// Assume both image are same size
	for (y=0; y<m_iHeight; y++)
	{
		for (x=0; x<m_iWidth; x++)
		{
			// calc index
			i = x + y*m_iWidth;
			// add 2 pixels
			iGray = m_pImg1[i] + m_pImg2[i];
			// keep saturation value
			if (iGray > 255) iGray = 255;
			// Save to destination image
			m_pImg[i] = iGray;
		}
	}

	// this code is an improved version from the above which uses only 1 loop
	/*for (i=0; i<m_iSize; i++)
	{
		// add 2 pixels
		iGray = m_pImg1[i] + m_pImg2[i];
		// keep saturation value
		if (iGray > 255) iGray = 255;
		m_pImg[i] = iGray;
	}*/

	// End Timing
	m_el.End();

	// Force redraw
	UpdateAllViews(0);
}

// MMX operation perform 8 additions in parallel every iteration
void CMMXDemoDoc::OnRunmmx() 
{
	// Begin timing
	m_el.Begin();

	// Copy from img1 to tmp
	memcpy(m_pImg, m_pImg1, m_iSize);
	// add img2 to tmp
	MMXAdd(m_pImg, m_pImg2, m_iWidth, m_iHeight);
	
	// End Timing
	m_el.End();

	// Force redraw
	UpdateAllViews(0);
}

// Enhance functions by using pointer arithmetics.
// Pointer access must be sequential.
void CMMXDemoDoc::OnRunopt() 
{
	// Begin timing
	m_el.Begin();

	// Precalculate the pointers
	BYTE *pSrc = m_pImg2;
	BYTE *pSrcEnd = m_pImg2 + m_iSize;
	BYTE *pDest = m_pImg;
	int iGray;

	// Copy from img1 to tmp
	memcpy(m_pImg, m_pImg1, m_iSize);
	
	// loop each pixel and Add
	while (pSrc < pSrcEnd)
	{
		iGray = *pDest + *pSrc;
		if (iGray > 255) iGray=255;
		*pDest = iGray;
		pSrc++;
		pDest++;
	}

	// End Timing
	m_el.End();

	// Force redraw
	UpdateAllViews(0);
}

void CMMXDemoDoc::OnClear() 
{
	// Clear data
	memset(m_pImg, 0, m_iSize);

	// Force redraw
	UpdateAllViews(0);
}

// Check out Part 2 or the articles if your system support SSE
void CMMXDemoDoc::OnChecksse() 
{
	// Check now
	if (CheckSSEAvail())
		AfxMessageBox("SSE Present");
	else
		AfxMessageBox("SSE is not available");
}

void CMMXDemoDoc::OnRunasm() 
{
	// Begin timing
	m_el.Begin();

	// Copy from img1 to tmp
	memcpy(m_pImg, m_pImg1, m_iSize);
	// add img2 to tmp
	AsmAdd(m_pImg, m_pImg2, m_iWidth, m_iHeight);
	
	// End Timing
	m_el.End();

	// Force redraw
	UpdateAllViews(0);
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here

Share

About the Author

Vincent Leong77
Web Developer Cortex Imaging
Malaysia Malaysia
Interest in computer vision, biometrics, image processing & software optimizations. Known language C/C++, VC++ MFC, Win32, COM/ATL, Assembly.

| Advertise | Privacy | Terms of Use | Mobile
Web03 | 2.8.141216.1 | Last Updated 31 Jul 2003
Article Copyright 2003 by Vincent Leong77
Everything else Copyright © CodeProject, 1999-2014
Layout: fixed | fluid