Click here to Skip to main content
15,896,726 members
Articles / Programming Languages / C++

find_first_of: A performance pitfall among the STL algorithms

Rate me:
Please Sign up or sign in to vote.
4.93/5 (36 votes)
18 Jun 2007CPOL18 min read 113.3K   247   26  
This article is discussing the performance problems found in the most notable find_first_of implementations and suggests useful improvements and workarounds.
#include "find_first_of_test.h"
#include "dpx_find_first_of.h"

#include <cstring>
#include <ctime>
#include <cassert>

#include <vector>
#include <string>
#include <set>

#include <algorithm>
#include <functional>
#include <iterator>

#include "AssocVector.h"
#include <map>

#ifdef __GNUG__
	#include <ext/hash_set>
	namespace stdext = __gnu_cxx;
#endif

#ifdef _MSC_VER
	#include <hash_set>
#endif


// ----------------------------------------------------------------------------

template<typename _Set>
struct SetMembershipFunctor : public std::unary_function<typename _Set::value_type, bool>
{
public:
	SetMembershipFunctor(_Set *__setPtr) { _m_setPtr = __setPtr; }

	inline bool operator() (const typename _Set::value_type __val) const
					{ return _m_setPtr->find(__val) != _m_setPtr->end(); }

protected:
	_Set *_m_setPtr;
};

// ----------------------------------------------------------------------------

template<typename _ForwardIterator>
struct SortedRangeMembershipFunctor
	: public std::unary_function<typename std::iterator_traits<_ForwardIterator>::value_type, bool>
{
public:
	SortedRangeMembershipFunctor(_ForwardIterator __first, _ForwardIterator __last)
		: _m_first(__first), _m_last(__last) {}

	inline bool operator() (const typename std::iterator_traits<_ForwardIterator>::value_type __val) const
					{ return std::binary_search(_m_first, _m_last, __val); }

protected:
	_ForwardIterator _m_first, _m_last;
};

// ----------------------------------------------------------------------------


void algo_test(algo_type type, unsigned long test_cycles, unsigned long search_size, unsigned long elements_count,
		std::ostream *outStreamPtr)
{
	if (outStreamPtr == NULL)
		outStreamPtr = &std::cout;

	assert(test_cycles > 0);
	assert((elements_count > 0) && (elements_count <= 250));
	assert(search_size > 1);

	if ( (test_cycles > 0) && (elements_count > 0) && (elements_count <= 250) && (search_size > 1) )
	{
		test_string search_range(search_size, 1);
		test_string search_elements(elements_count, -1);

		test_string::iterator it;
		test_string::value_type val = 1; 

		for (it = search_elements.begin(); it != search_elements.end(); ++it)
			*it = ++val;

		std::random_shuffle(search_elements.begin(), search_elements.end());

		search_range[search_range.size() - 1] = val;

		clock_t timer = run_algo(type, search_range, search_elements, test_cycles);

		//------------------------------

		char *algo_name[] = { "unknown", "find_first_of", "strpbrk",
						"find_first_of_set", "alg_find_first_of_hash_set", "alg_find_first_of_assoc_vec", 
						"alg_find_first_of_sorted_range", "alg_dpx_find_first_of" };

		*outStreamPtr << std::endl << std::endl;

		*outStreamPtr << "Algorithm: "		<< algo_name[((unsigned)type < ARRAY_SIZE(algo_name)) ? type : 0] << std::endl;
		*outStreamPtr << "Test cycles: "	<< test_cycles << std::endl;
		*outStreamPtr << "Search size: "	<< (unsigned long) search_range.size() << std::endl;
		*outStreamPtr << "Element count: "	<< (unsigned long) search_elements.size() << std::endl;
		*outStreamPtr << std::endl;

		*outStreamPtr << "Elapsed time: "	<< std::endl;
		*outStreamPtr << "=> " << timer		<< std::endl;
	}
}

// ----------------------------------------------------------------------------

clock_t run_algo(algo_type type, const test_string &search_range, const test_string &search_elements, unsigned long test_cycles)
{
	volatile const test_string::value_type *ptr = NULL;
	test_string::const_iterator it;

	clock_t timer = clock();

	assert( (search_range.size() > 0) && (search_elements.size() > 0) && (test_cycles > 0) );

	if ( (search_range.size() > 0) && (search_elements.size() > 0) && (test_cycles > 0) )
	{
		switch ( type )
		{
			case alg_find_first_of:
			{
				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					it = std::find_first_of(search_range.begin(), search_range.end(),
											search_elements.begin(), search_elements.end());

					assert(it == (search_range.begin() + search_range.size()-1));
				}
			}
			break;

			case alg_dpx_find_first_of:
			{
				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					it = dpx::find_first_of(search_range.begin(), search_range.end(),
											search_elements.begin(), search_elements.end());

					assert(it == (search_range.begin() + search_range.size()-1));
				}
			}
			break;

			case alg_strpbrk:
			{
				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					ptr = strpbrk(search_range.c_str(), search_elements.c_str());
				}

				assert(ptr == (&search_range[0] + search_range.size()-1)); 
			}
			break;

			case alg_find_first_of_set:
			{
				typedef std::set<test_string::value_type> set_of_elements;
				set_of_elements s(search_elements.begin(), search_elements.end());
				SetMembershipFunctor<set_of_elements> memberOfSet(&s);

				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					it = std::find_if(search_range.begin(), search_range.end(), memberOfSet);
				}

				assert(it == (search_range.begin() + search_range.size()-1));
			}
			break;

			case alg_find_first_of_hash_set:
			{
				typedef stdext::hash_set<test_string::value_type> set_of_elements;
				set_of_elements elementsSet(search_elements.begin(), search_elements.end());
				SetMembershipFunctor<set_of_elements> memberOfSet(&elementsSet);

				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					it = std::find_if(search_range.begin(), search_range.end(), memberOfSet);
				}

				assert(it == (search_range.begin() + search_range.size()-1));
			}
			break;

			case alg_find_first_of_assoc_vec:
			{
				typedef Loki::AssocVector<test_string::value_type, bool> set_of_elements;
				//typedef std::map<test_string::value_type, bool> set_of_elements;

				set_of_elements s;

				for ( it = search_elements.begin(); it != search_elements.end(); ++it )
					s[*it] = true;

				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					for ( it = search_range.begin(); it != search_range.end(); ++it )
					{
						if (s.find(*it) != s.end())
							break;
					}
				}

				assert(it == (search_range.begin() + search_range.size()-1));
			}
			break;

			case alg_find_first_of_sorted_range:
			{
				typedef std::vector<test_string::value_type> sorted_range;
				sorted_range elementsRange(search_elements.begin(), search_elements.end());
				std::sort(elementsRange.begin(), elementsRange.end());

				SortedRangeMembershipFunctor<sorted_range::const_iterator>
							memberOfRange(elementsRange.begin(), elementsRange.end());

				
				for ( unsigned long n = 0; n < test_cycles; ++n )
				{
					it = std::find_if(search_range.begin(), search_range.end(), memberOfRange);
				}
				

				assert(it == (search_range.begin() + search_range.size()-1));
			}
			break;

			default:
				break;
		}
	}

	if ( ptr == (&search_range[0] + search_range.size()-1) )
	return clock() - timer;

	if ( it == (search_range.begin() + search_range.size()-1) )
		return clock() - timer;

	return -1;
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Software Developer
Greece Greece
I live in Greece with my wife and our two daughters. I am a professional software developer since 1999, using mostly C/C++ in my work.

My main expertise are: C/C++, STL, software optimization, generic programming and debugging. I am also very experienced in client–server programming, communications, concurrent programming, software security and cryptography. Finally, in my early professional years, I have worked a lot on cross-platform programming (Mac+Win).

I am familiar with the MFC, wxWidgets and Cplat GUI frameworks and the Python, Java, Pascal, Fortran, Prolog and Rexx programming languages.

Comments and Discussions