Click here to Skip to main content
15,885,767 members
Articles / Programming Languages / C++

Scaling of memory intensive multi-threaded applications to SMMP computers

Rate me:
Please Sign up or sign in to vote.
4.92/5 (74 votes)
2 Jan 200422 min read 283K   2K   99  
This article discusses impact of the multithreaded run-time library's memory manager on scaling memory-intensive server applications to Shared Memory Multiprocessor computers.
  • rtl_scaling_sources.zip
    • rtl_scaling_sources
      • processes1cpu_results
        • test.2.1.1.log
        • test.2.2.1.log
        • test.2.2.2.log
        • test.2.3.1.log
        • test.2.3.2.log
        • test.2.3.3.log
        • test.2.4.1.log
        • test.2.4.2.log
        • test.2.4.3.log
        • test.2.4.4.log
        • test.2.6.1.log
        • test.2.6.2.log
        • test.2.6.3.log
        • test.2.6.4.log
        • test.2.6.5.log
        • test.2.6.6.log
      • processes2cpu_results
        • test.1.1.1.log
        • test.1.2.1.log
        • test.1.2.2.log
        • test.1.3.1.log
        • test.1.3.2.log
        • test.1.3.3.log
        • test.1.4.1.log
        • test.1.4.2.log
        • test.1.4.3.log
        • test.1.4.4.log
        • test.1.6.1.log
        • test.1.6.2.log
        • test.1.6.3.log
        • test.1.6.4.log
        • test.1.6.5.log
        • test.1.6.6.log
        • test.2.1.1.log
        • test.2.2.1.log
        • test.2.2.2.log
        • test.2.3.1.log
        • test.2.3.2.log
        • test.2.3.3.log
        • test.2.4.1.log
        • test.2.4.2.log
        • test.2.4.3.log
        • test.2.4.4.log
        • test.2.6.1.log
        • test.2.6.2.log
        • test.2.6.3.log
        • test.2.6.4.log
        • test.2.6.5.log
        • test.2.6.6.log
        • test.3.1.1.log
        • test.3.2.1.log
        • test.3.2.2.log
        • test.3.3.1.log
        • test.3.3.2.log
        • test.3.3.3.log
        • test.3.4.1.log
        • test.3.4.2.log
        • test.3.4.3.log
        • test.3.4.4.log
        • test.3.6.1.log
        • test.3.6.2.log
        • test.3.6.3.log
        • test.3.6.4.log
        • test.3.6.5.log
        • test.3.6.6.log
        • test.4.1.1.log
        • test.4.2.1.log
        • test.4.2.2.log
        • test.4.3.1.log
        • test.4.3.2.log
        • test.4.3.3.log
        • test.4.4.1.log
        • test.4.4.2.log
        • test.4.4.3.log
        • test.4.4.4.log
        • test.4.6.1.log
        • test.4.6.2.log
        • test.4.6.3.log
        • test.4.6.4.log
        • test.4.6.5.log
        • test.4.6.6.log
      • run_processes.bat
      • run_processes_impl.bat
      • run_threads.bat
      • run_threads_impl.bat
      • sources
      • TestSMP2.exe
      • threads1cpu_results
      • threads2cpu_results
// TestSMP2.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"

#include <process.h>
#include <string>
#include <vector>
#include <list>
#include <windows.h>
#include <math.h>

#include "QAFDebug.h"

/// Parameters for the Worker thread
struct CWorkerThreadParam 
{
	/// Original index of the worker thread in the threads array
	DWORD dwIndex;
	
	/// Count of iterations
	DWORD dwIter;	
};

/// Test ID enumeration
enum TESTS
{
	TEST_CPU        = 1, ///< Computational test
	TEST_MEM_ALLOC  = 2, ///< Memory allocation test
	TEST_MEM_MIXED  = 3, ///< Memory allocation and access test
	TEST_MEM_ACCESS = 4, ///< Memory access test
	TEST_COUNT      = 4  ///< Count of tests
};

/// Count of threads specified in the command line
unsigned int nThreadCount = 0;

/// Count of iterations for each thread specified in the command line
unsigned int nIterCount = 0;

/// Test ID specified in the command line, look at the Usage() function
unsigned int nTest = 0;

/// Loop count, a factor of slowering the test
unsigned int nFactor = 0;

/// Maximum allowed count of threads
const DWORD THREAD_COUNT = 10;

/// Worker threads
std::vector<HANDLE> threads( THREAD_COUNT );

/// Worker thread params
CWorkerThreadParam params0[THREAD_COUNT];

/// Size of the test array
const int MEM_TEST_ARRAY_SIZE = 300 * 1000;

/// A global test array for the test of memory access 
std::list<char> MemTestArray;

/// print the status of the thread
void PrintThreadStatus( DWORD nThreadIndex, std::string &szStatus, DWORD dwDiffMSec )
{
	Q_ASSERT( (nThreadIndex > 0) && (nThreadIndex <= nThreadCount) );
	Q_ASSERT( !szStatus.empty() );
	if( 0 == dwDiffMSec )
		printf( "Thread %2d %s.\n", nThreadIndex, szStatus.c_str() );
	else
		printf( "Thread %2d %s in %u msec.\n", nThreadIndex, szStatus.c_str(), dwDiffMSec );
}

/// Worker thread function
unsigned int __stdcall WorkerThreadProcess( void *param )
{
	if( NULL == param )
		return 0;
	
	CWorkerThreadParam * pThreadParam = (CWorkerThreadParam *) param;
	
	/// Report on thread start
	DWORD dwStartThread = GetTickCount();
	PrintThreadStatus( pThreadParam->dwIndex, std::string("initialized"), 0 );

	float fSum = 0;
	for( int i = 0; i < pThreadParam->dwIter; ++i )
	{
		/// Report on thread start
		DWORD dwStart = GetTickCount();
		PrintThreadStatus( pThreadParam->dwIndex, std::string("started"), 0 );

		switch( nTest ) 
		{
			case TEST_CPU:
			{
				for( int j = 0; j < nFactor * 10 * 1000 * 1000; j++ )
				{
					int i = j * 123;
					i = ++i / 11;
					i = j - (i / 3);
					fSum += i;
					float f1 = sin( (float)i );
					float f2 = acos( f1 );
					fSum += f2;
				}
				break;
			}
			case TEST_MEM_ALLOC:
			{
				for( int i = 0; i < nFactor * 10; ++i )
				{
					std::list<char> MemTestArray(  MEM_TEST_ARRAY_SIZE );
				}
				break;
			}
			case TEST_MEM_MIXED:
				{
					for( int i = 0; i < nFactor * 10; ++i )
					{
						std::list<char> MemTestArray(  MEM_TEST_ARRAY_SIZE );
						std::list<char>::iterator it = MemTestArray.begin();
						std::list<char>::iterator itEnd = MemTestArray.end();
						for( ; it != itEnd; ++it )
						{
							*it = 123;
						}
					}
					break;
				}
			case TEST_MEM_ACCESS:
			{
				std::list<char>::iterator it = MemTestArray.begin();
				std::list<char>::iterator itEnd = MemTestArray.end();
				for( int i = 0; i < nFactor * 600 * 1000 * 1000; ++i )
				{
					for( ; it != itEnd; ++it )
					{
						*it = *it + 1;
						fSum += *it;
					}
				}
				break;
			}
		}

		// Report about the thread completion
		PrintThreadStatus( pThreadParam->dwIndex, std::string("completed"), GetTickCount() - dwStart );
	}

	if( (fSum > 0) && (fSum < 10) )
		pThreadParam->dwIter = fSum;

	// Report about the thread completion
	PrintThreadStatus( pThreadParam->dwIndex, std::string("finalized"), GetTickCount() - dwStartThread );
	
	return 0;
}

/// Start worker thread
bool StartWorkerThread( DWORD i, DWORD iter )
{
	if( i >= THREAD_COUNT ) 
		return false;

	params0[i].dwIndex = i + 1;
	params0[i].dwIter = iter;
	
	threads[i] = (HANDLE) _beginthreadex( NULL, 0, WorkerThreadProcess, &params0[i], 0, NULL );
	if( NULL == threads[i] )
		return false;

	return true;
}

char * GetCmdLineArg( int argc, char * argv[], char * szParamName, char * szDefault )
{
	if( Q_INVALID( argc <= 0 ) || Q_INVALID( NULL == argv ) )
		return szDefault;
	if( Q_INVALID( NULL == szParamName ) || Q_INVALID( 0 == szParamName[0] ) )
		return szDefault;
	for( int i = 1; i < (argc - 1); ++i )
	{
		if( Q_ASSERT( NULL != argv[i] ) && (0 == strcmp( szParamName, argv[i] )) )
			
		{
			++i;
			Q_ASSERT( NULL != argv[i] );
			return argv[i];
			
		}
		
		
	}
	return szDefault;
}

bool IsCmdLineArg( int argc, char * argv[], char * szParamName )
{
	if( Q_INVALID( argc <= 0 ) || Q_INVALID( NULL == argv ) )
		return false;
	if( Q_INVALID( NULL == szParamName ) || Q_INVALID( 0 == szParamName[0] ) )
		return false;
	for( int i = 1; i < argc; ++i )
	{
		if( Q_ASSERT( NULL != argv[i] ) && (0 == strcmp( szParamName, argv[i] )) )
			return true;
	}
	return false;
}

void Usage()
{
	printf( "Usage: TestSMP2.exe -test <T> -threads <N> -iter <I>\n" );
	printf( "       -test <T>     Test ID\n" );
	printf( "              1      Computational test\n" );
	printf( "              2      Memory allocation test\n" );
	printf( "              3      Memory allocation and access test\n" );
	printf( "              4      Memory access test\n" );
	printf( "       -threads <N>  Count of parallel threads (from 1 to 10).\n" );
	printf( "       -iter <I>     Count of iterations for each thread (I > 1).\n" );
	printf( "       -factor <F>   Loop count, a factor of slowering the test (F > 1).\n" );
	printf( "       -nopause      If specified, the program will not pause before exiting.\n" );
	printf( "Example:\n" );
	printf( "       TAMultiTest.exe -threads 2 -iter 10 -input \"c:\\input files\\\\\"\n" );
}

/// the main body
int main( int argc, char* argv[] )
{
	nTest = atoi( GetCmdLineArg( argc, argv, "-test", "1" ) );
	nThreadCount = atoi( GetCmdLineArg( argc, argv, "-threads", "1" ) );
	nIterCount = atoi( GetCmdLineArg( argc, argv, "-iter", "1" ) );
	nFactor = atoi( GetCmdLineArg( argc, argv, "-factor", "1" ) );
	bool bPause = !IsCmdLineArg( argc, argv, "-nopause" );
	
	if( (nThreadCount < 1) || (nThreadCount > 10) || (nIterCount < 1) || (nTest <= 0) || (nTest > TEST_COUNT) )
	{
		Usage();
		return 1;
	}
	
	if( TEST_MEM_ACCESS == nTest )
	{
		printf( "Preparing global data structures...\n\n" );
		MemTestArray.resize( MEM_TEST_ARRAY_SIZE );
	}

	printf( "Testing started...\n\n" );
	
	try
	{
		printf( "\nTest #%d, threads = %d, iterations = %d, factor = %d...\n\n", 
			nTest, nThreadCount, nIterCount, nFactor );

		DWORD dwStart = GetTickCount(); 
		int c = 0;
		// Start all threads
		for( int i = 0; i < nThreadCount; i++ )
		{
			StartWorkerThread( i, nIterCount );
			c++;
		}
		// wait for all threads to finish
		DWORD dwRet = WaitForMultipleObjects( nThreadCount, &threads[0], TRUE, INFINITE );
		if( dwRet >= (WAIT_OBJECT_0 + nThreadCount) )
		{
			Q_CHECK( WAIT_OBJECT_0, dwRet );
			printf( "\nTest 1%d failed!\n\n", nTest );
			return 0;
		}
		for( i = 0; i < nThreadCount; i++ )
			CloseHandle( threads[i] );
		
		printf( "\nTest #%d completed in %d msec!\n\n", nTest, GetTickCount() - dwStart );
	}
	catch( char * sz )
	{
		printf( "\nException: %s!\n", sz );
	}
	catch( ... )
	{
		printf( "\nUnhandled exception!\n" );
	}
	
	if( bPause )
	{
		printf( "\nPress Enter to quit...\n" );
		getchar();
	}

	if( TEST_MEM_ACCESS == nTest )
	{
		printf( "Destructing global data structures...\n\n" );
		MemTestArray.clear();
	}
	
	return 0;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article has no explicit license attached to it but may contain usage terms in the article text or the download files themselves. If in doubt please contact the author via the discussion board below.

A list of licenses authors might use can be found here


Written By
Team Leader OpTier
Israel Israel
Programming computers since entering the university in 1992, but dreaming of programming long time before putting hands on my first computer.

Experienced in cross-platform software development using C++ and Java, as well as rapid GUI development using Delphi/C#. Strong background in networking, relational databases, Web development, and mobile platforms.

Like playing guitar, visiting historical sites (not in the Internet, in the car Smile | :) ) and cooking meat with friends (sorry about vegetarians). Look for more information on www.schetinin.com

Comments and Discussions