Click here to Skip to main content
15,884,298 members
Articles / Desktop Programming / Win32

Concurrency Runtime in Visual C++ 2010

Rate me:
Please Sign up or sign in to vote.
4.98/5 (60 votes)
11 Nov 2010CPOL49 min read 147.2K   2.2K   164  
Learn about parallel algorithms, parallel containers, tasks, task groups, agents library, task scheduler etc in VC10
// Copyright (C) 2010
// Ajay Vijayvargiya

// This source code was published on CodeProject.com, under CPOL license
// This code CANNOT be used for similar publication on the web or as printed material.
// This can, however, be used as educational reference in educational institutions.
// You are allowed to use the accompanying code in your programs, 
//    commercial or non commercial. 

// This source code is only intended to explicate the Concurrency Runtime in Visual C++ 2010,
// and thus may contain bugs, some logical issues etc.
// The explanation is relevant to Visual C++ 2010 (Compiler version: 16.0)

// http://www.codeproject.com/KB/cpp/parallelcpp.aspx


// For performance benchmarking, simple approach, GetTickCount is used.
// If you prefer, you may use other high performance counters.
// Ignore the importance and non-optimized code, they are just for illustration.


#include "stdafx.h"
#include "examples.h"

#include <ppl.h>
#include <vector>
#include <algorithm>
#include <numeric>

#include "base.h"


using namespace std;
using namespace Concurrency;





/***************************************************************************/
/* Function: ParallelSum                                                   */
/***************************************************************************/
/* Demonstrates parallel_for algorithm  and the combinable class           */
/* It should be noted that each iteration should do something specific,    */
/* and not trivial thing like adding the number only. When each iteration. */
/* performs something, it gives performance benefits over serial execution */
/* That's why I sum only number divisible by some number. Had it been just */
/* summation of all numbers, serial version would perform faster, since    */
/* combinable::local function would use more of CPU than actual sum.       */
/***************************************************************************/

#define DIVISIBLE_BY	7

void ParallelSum()
{
	unsigned __int64 nSumTill;
	unsigned __int64 nSum;

	combinable<ULONGLONG> Sum;

	// This object is just placed to demonstrate more of combinable class.
	combinable<vector<ULONGLONG>> VectorOfDivisibles;

	vector<ULONGLONG> Dummy;

	DWORD nTickStart, nTickEnd;
	
	wcout << "Enter the number to sum from 1 to 'number' :";
	wcin >> nSumTill;


	wcout << "\nAccumulating serially (check that only 1 CPU-core is utilized)...";
	nTickStart = GetTickCount();

	// Execute serially
	nSum = 0;
	for ( ULONGLONG nValue = 1; nValue <= nSumTill; ++nValue)
	{
		if ( (nValue % DIVISIBLE_BY) == 0)
		{
			nSum += nValue;
			Dummy.push_back(nValue);	// Put just to make sure both loops perform same work.
		}
	}

	nTickEnd = GetTickCount();

	wcout << "\nThe sum is: " << nSum << 
		"\nWhich took " << (nTickEnd - nTickStart) << "ms.";

	wcout << "\nTotal numbers: " <<  Dummy.size();


	wcout << "\nAccumulating parallelly (Multiple cores are being utilized)...";
	nTickStart = GetTickCount();

	// Execute parallelly	
	parallel_for( (ULONGLONG)1, nSumTill+1, [&](ULONGLONG nValue)
	{
		if ( (nValue % DIVISIBLE_BY) == 0)
		{
			Sum.local() += nValue;

			// Add this number to 'this' thread-specific vector.
			VectorOfDivisibles.local().push_back(nValue);
		}		
	});

	int nNumberCount = 0 ;
	VectorOfDivisibles.combine_each([&nNumberCount](const vector<ULONGLONG>& long_vector)
	{
		nNumberCount += long_vector.size();
	});

	nTickEnd = GetTickCount();

	wcout << "\nThe sum is: " << Sum.combine(plus<ULONGLONG>())<< 
		"\nWhich took " << (nTickEnd - nTickStart) << "ms.";

	

	wcout << "\nTotal numbers: " <<  nNumberCount;
}








/****************************************************************************/
/* Function: ParallelPrimeFind                                              */
/****************************************************************************/
/* Demonstrates parallel_for_each algorithm  and the combinable class       */
/* This function first generates few numbers (1 to ELEMENT_COUNT_FOR_PRIME) */
/* Then it shuffles the numbers.                                            */
/* For performance benchmarking of serial and parallel executions, it finds */
/* "count" of prime numbers in this range. Timing of both computation is    */
/* rendered to console.														*/
/****************************************************************************/

// 500 thousand / 5 lacs
#define ELEMENT_COUNT_FOR_PRIME		500000

void ParallelPrimeFind()
{
	vector<int> Numbers;
	combinable<int> Primes;
	int nPrimes;

	DWORD nTickStart, nTickEnd;

	// Allocate vector
	Numbers.resize(ELEMENT_COUNT_FOR_PRIME);

	// Generate incrementing numbers
	int nCounter = 1;
	generate(Numbers.begin(), Numbers.end(), [&nCounter] 
	{
		return nCounter++;
	});

	// Shuffle the vector
	random_shuffle(Numbers.begin(), Numbers.end());


	wcout << "Finding primes serially...";

	// Find the prime numbers (count), serially.
	// The lambda counts them.
	nTickStart = GetTickCount();

	nPrimes = 0;
	for_each(Numbers.begin(), Numbers.end(), [&nPrimes](int nNumber)
	{
		if(IsPrimeNumber(nNumber))
			nPrimes++;
	});

	nTickEnd = GetTickCount();

	wcout << "\nPrimes found: " << nPrimes <<
		"\nWhich took " << (nTickEnd - nTickStart) << "ms.";




	wcout << "\nFinding primes parallelly...";

	// Find the prime numbers (count), serially.
	// The lambda counts them.
	nTickStart = GetTickCount();

	
	parallel_for_each(Numbers.begin(), Numbers.end(), [&Primes](int nNumber)
	{
		if(IsPrimeNumber(nNumber))
		{
			Primes.local()++;
		}
	});

	nTickEnd = GetTickCount();

	nPrimes = Primes.combine(plus<int>());

	wcout << "\nPrimes found: " << nPrimes <<
		"\nWhich took " << (nTickEnd - nTickStart) << "ms.";
}






/*****************************************************************************/
/* Function: ParallelInvokeExample                                          **/
/*****************************************************************************/
/* It runs three tasks/function in serial and then parallel, and displays the*/
/* time taken.																 */
/* The first two routines, finds the count of even/odd number, and puts those*/
/* number in string format in a vector. Converting and inserting into vector */
/* is just to make sure that each function "does something" CPU intensive.   */
/* Since, I did not use sleep, UI, file-IO or other external factors, doing  */
/* was only choice.															 */
/* This third routine does the same with prime numbers.						 */
/*****************************************************************************/


// 5 million / 50 lacs
#define ELEMENT_COUNT_PARALLEL_INVOKE	5000000

void ParallelInvokeExample()
{
	ULONGLONG nEvenSum, nOddSum, nPrimeSum;
	
	vector<string> Evens, Primes, Odds;
	DWORD nTickStart, nTickEnd;

	// Let's define few lambdas locally and store them into 'auto' variables
	
	//.....//
	auto EvenAccumulator = [&nEvenSum, &Evens]
	{
		nEvenSum = 0;
		char sBuffer[64];	// Just a placeholder		

		// Non optimized way to sum...
		for (int nValue = 1; nValue <= ELEMENT_COUNT_PARALLEL_INVOKE; ++nValue)
		{
			if ( nValue%2 == 0 )
			{
				nEvenSum += nValue;
									  
				sprintf_s(sBuffer, "%d", nValue);				
				Evens.push_back(sBuffer);			
			}
		}
	};

	auto OddAccumulator = [&nOddSum, &Odds]
	{
		nOddSum = 0;
		char sBuffer[64];	// Just a placeholder

		// Non optimized way to sum...
		for (int nValue = 1; nValue <= ELEMENT_COUNT_PARALLEL_INVOKE; ++nValue)
		{
			if ( nValue%2 != 0 )
			{
				nOddSum += nValue;

				sprintf_s(sBuffer, "%d", nValue);				
				Odds.push_back(sBuffer);
			}
		}		
	};

	auto PrimesCounter= [&Primes, &nPrimeSum]
	{	
		char sBuffer[64];	// Just a placeholder

		// Non optimized way to sum...
		for (int nValue = 1; nValue <= ELEMENT_COUNT_FOR_PRIME; ++nValue)
		{
			if( IsPrimeNumber(nValue) )
			{
				nPrimeSum += nValue;

				sprintf_s(sBuffer, "%d", nValue);				
				Primes.push_back(sBuffer);
			}
		}		
	};
	//.....//



	// Do it serially...
	wcout << "\nExecuting serially...";
	nTickStart = GetTickCount();

	EvenAccumulator();
	OddAccumulator();
	PrimesCounter();

	nTickEnd = GetTickCount();


	wcout << "\nSerial execution took :" << (nTickEnd - nTickStart) << "ms" 
		"\nEven sum: " << nEvenSum <<
		"\nOdd sum: " << nOddSum<<
		"\nPrimes count: " << Primes.size();


	Primes.clear();
	Odds.clear();
	Evens.clear();



	// Do it parallelly 
	wcout << "\n\nExecuting parallelly...";
	nTickStart = GetTickCount();

	parallel_invoke(
		EvenAccumulator,
		OddAccumulator,
		PrimesCounter);

	nTickEnd = GetTickCount();


	wcout << "\nParallel execution took :" << (nTickEnd - nTickStart) << "ms" 
		"\nEven sum: " << nEvenSum <<
		"\nOdd sum: " << nOddSum<<
		"\nPrimes count: " << Primes.size();
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer (Senior)
India India
Started programming with GwBasic back in 1996 (Those lovely days!). Found the hidden talent!

Touched COBOL and Quick Basic for a while.

Finally learned C and C++ entirely on my own, and fell in love with C++, still in love! Began with Turbo C 2.0/3.0, then to VC6 for 4 years! Finally on VC2008/2010.

I enjoy programming, mostly the system programming, but the UI is always on top of MFC! Quite experienced on other environments and platforms, but I prefer Visual C++. Zeal to learn, and to share!

Comments and Discussions