Fast Automatic Differentiation in C++

Eugene Shatokhin

Rate me:

4.67/5 (12 votes)

1 Sep 2006CPOL12 min read

76.2K

2.9K

An article on fast automatic differentiation (FAD), another way of computing the derivatives of a function.

fadcpp_demo.zip
- FADcpp_examples.sln
- gradient
  - gradient.cpp
  - gradient.vcproj
  - stdafx.cpp
  - stdafx.h
- iter
  - iter.cpp
  - iter.vcproj
  - stdafx.cpp
  - stdafx.h
- iter_f
  - iter_f.cpp
  - iter_f.vcproj
  - stdafx.cpp
  - stdafx.h
- reed
  - adouble.cpp
  - adouble.h
  - adouble_f.h
  - asection.cpp
  - asection.h
  - doc
    - reed library v01.chm
  - lib
  - mem_pool.h
  - node.h
  - util.cpp
  - util.h
- reed_lib
  - reed_lib.vcproj
  - stdafx.cpp
  - stdafx.h
- simple
  - simple.cpp
  - simple.vcproj
  - stdafx.cpp
  - stdafx.h
fadcpp_src.zip
- adouble.cpp
- adouble.h
- adouble_f.h
- asection.cpp
- asection.h
- reed library v01.chm
- mem_pool.h
- node.h
- util.cpp
- util.h

//////////////////////////////////////////////////////////////////////////
/// The Reed Library 
/// Copyright (c) 2006 Yevgeniy Shatokhin 

/// File: mem_pool.h 
/// Contains an implementation of an single threaded allocator for small (4-128 
/// bytes or so) fized-size objects that have 'next' field. 
/// 
//////////////////////////////////////////////////////////////////////////

/// 
/// Last update: August 29, 2006

#ifndef __MEM_POOL_H_198300CE_37CE_476E_A5E7_6A700185B361_INCLUDED_
#define __MEM_POOL_H_198300CE_37CE_476E_A5E7_6A700185B361_INCLUDED_

#include <cassert>
#include <list>

#ifndef REED_DEFAULT_EXPANSION_SIZE		// in elements rather than bytes
#define REED_DEFAULT_EXPANSION_SIZE 2048 ///< Plan for this number of elements.
#endif

#pragma warning(push)	// store current state for each warning
#pragma warning(disable: 4311 4312)

namespace reed
{
////////////////////////////////////////////////////////////////////////////////
/// class CMemoryPool \n
/// Offers services for allocating fixed-sized objects one at a time. Memory is
/// only deallocated as a whole but can be reused.\n
/// The stored object must be a POD structure or it can have some methods (except 
/// nontrivial ctor/dtor - they won't be called). It can have vftable as well. \n
/// CMemoryPool instances must not be shared between different threads. 
/// Use it for single threaded memory pooling only.
////////////////////////////////////////////////////////////////////////////////
	template <class T>
	class CMemoryPool
	{
	public:
		//////////////////////////////////////////////////////////////////////////
		// type definitions
		typedef T* pointer;
		typedef unsigned char byte_t;

	private:
		//////////////////////////////////////////////////////////////////////////
		/// struct Chunk \n
		/// Manages a continuous memory block.
		//////////////////////////////////////////////////////////////////////////
		struct Chunk
		{
			/// 
			/// Raw (may be unaligned) ptr to the 1st block int the memory chunk.
			byte_t* pData;

			/// Aligned (at ALIGN_BYTES boundary) ptr to the 1st block int the memory 
			/// chunk.
			byte_t* pFirst;	

			/// Ptr to the last block int the memory chunk (not to the area behind it).
			/// pLast = (byte_t* )((unsigned int)pFirst + size_of_this_chunk - sizeof(T))
			byte_t* pLast;	

			/// 
			/// The block should be large enough to contain at least a pointer.
			enum {BLOCK_SIZE = (sizeof(T) > sizeof(pointer)) ? sizeof(T) : sizeof(pointer)};

			///	A boundary to align chunk start addresses at (in bytes). Must be 
			/// 2^t, t = 1, 2, 3, ...
			enum {ALIGN_BYTES = 32};

			//////////////////////////////////////////////////////////////////////////
			// methods

			/// Clear an already allocated chunk. Its memory is not released though. \n
			/// pNext will be written as the 'next' pointer in the last block of the
			/// chunk.
			inline void reset(pointer pNext = NULL);

			/// 
			/// Initialize the chunk object to hold 'EXPANSION_SIZE' blocks.
			void init()
			{
				assert(EXPANSION_SIZE > 0);

				// Overflow check
				assert((BLOCK_SIZE * EXPANSION_SIZE) / BLOCK_SIZE == EXPANSION_SIZE);

				// allocate memory. Up to ALIGN_BYTES bytes per chunk will be wasted
				// but this cost is generally negligible.
				pData = new byte_t[BLOCK_SIZE * EXPANSION_SIZE + ALIGN_BYTES];
				
				// align pFirst if it's not aligned already. It remains the same if 
				// it's already aligned.
				pFirst = 
					(byte_t*)(((unsigned long)pData + ALIGN_BYTES - 1) & (~(ALIGN_BYTES - 1)));
				
				// calculate pLast
				pLast = (byte_t*)(pFirst + BLOCK_SIZE * (EXPANSION_SIZE - 1));

				// Set 'next' pointers properly. The last element sould point to nothing
				// (i.e. it will be NULL).
				reset();
			}
		};
		
	private:
		//////////////////////////////////////////////////////////////////////////
		// Data
		enum {EXPANSION_SIZE = REED_DEFAULT_EXPANSION_SIZE};

		typedef std::list<Chunk> Chunks;	
		Chunks chunks_;			///< A list of chunks.

		/// 
		/// Pointer to the first free block (available for allocation).
		pointer pFree;

		/// 
		/// Pointer to the last allocated block (pTail->next == pFree).
		//pointer pTail;

		/// 
		/// To ensure proper copy semantics
		mutable const CMemoryPool* prev_;
		mutable const CMemoryPool* next_;

	public:
	//////////////////////////////////////////////////////////////////////////
	// methods
		/// Construct a CMemoryPool able to manage blocks of 'Chunk::BLOCK_SIZE' size. \n
		/// No chunks are created here in the constructor.
		CMemoryPool() throw()		
			: pFree(NULL)
		{
			prev_ = next_ = this;
		}

		/// Copy ctor. \n
		/// A copy has its own array of chunks but with the same contents. That is,
		/// both the original and the copy manage the same memory and are fully 
		/// interchangeable.
		CMemoryPool(const CMemoryPool& rhs) throw()
			: chunks_(rhs.chunks_),
			pFree(rhs.pFree)
		{
			prev_ = &rhs;
			next_ = rhs.next_;
			rhs.next_->prev_ = this;
			rhs.next_ = this;
		}

		/// 
		///	Dtor.
		~CMemoryPool() throw();

        /// 
		/// Assignment.
		CMemoryPool& operator=(const CMemoryPool&)
		{
			CMemoryPool<T> cpy(rhs);
			cpy.swap(*this);
			return *this;
		}

		/// 
		/// Swap the contents of two allocators, this and rhs.
		void swap(CMemoryPool& rhs)
		{
			std::swap(pFree, rhs.pFree);
			chunks_.swap(rhs.chunks_);
		}

		/// Allocate but don't initialize a block of type T. \n
		/// Only one block at a time can be allocated via this allocator.
		inline pointer allocate();

		/// Return pointer to the first block of the first chunk. \n
		/// Returns NULL if there are no allocated chunks.
		pointer getHead()
		{
			return (chunks_.empty()) ? NULL : 
				reinterpret_cast<pointer>(chunks_.front().pFirst);
		}

		/// 
		/// Return pFree pointer (can be NULL).
		pointer getFree()
		{
			return pFree;
		}

		/// Return pointer to the last allocated block. Cannot be NULL.\n
		/// If there are no allocated chunks - assert in debug mode, undefined 
		/// behaviour in release.
		pointer getTail()
		{
			if (chunks_.empty()) return NULL;

			Chunks::iterator i = chunks_.begin();
			if ((byte_t*)pFree == i->pFirst)
			{
                // special case: pFree points to the 1st element of the 1st chunk.
				// => nothing has been allocated so far - return NULL
				return NULL;
			}

			// pFree = tail->next, so first we should find the chunk pFree points to.
            pointer p = NULL;

			for (/*i = chunks_.begin()*/; i != chunks_.end(); ++i)
			{
				if (((byte_t*)pFree > i->pFirst) && ((byte_t*)pFree <= i->pLast))	
				{
					p = reinterpret_cast<pointer>((byte_t*)pFree - Chunk::BLOCK_SIZE);
					assert(p->next == pFree);
					break;					
				}
				else if ((byte_t*)pFree == i->pFirst)
				{
					Chunks::iterator pos = i;
					--pos;
					p = reinterpret_cast<pointer>(pos->pLast);
					assert(p->next == pFree);
					break;
				}
			}

			return p;
		}

		/// Determine if a memory block p points to is managed by this allocator.
		/// Returns TRUE (something != 0) if such block is found, FALSE (0) 
		/// otherwise. FALSE is also returned if p is not properly aligned.\n
		/// This function cannot tell if this block was allocated or it is still free.
		int findBlock(pointer p);

		/// Deallocate all the blocks at once (their dtors are not called)
		/// beginning from the element 'pFrom' points to (exclusive).
		/// Memory chunks are not deallocated though, they can be reused.\n
		/// If pFrom == NULL (default) all blocks will be cleared.
		void clear(pointer pFrom = NULL)
		{
			// pFrom must be valid
			assert ((pFrom == NULL) || (findBlock(pFrom)));

			// (NB) getHead() returns NULL if no chunks have been allocated so far,
			// and that's what we need here (set pFree to NULL);
			pFree = (pFrom == NULL) ? getHead() : pFrom->next;
		}

	private:
		///
		/// Expand memory pool (create new chunk)
		void expand();

	};	// CMemoryPool

//////////////////////////////////////////////////////////////////////////
/// Implementation
//////////////////////////////////////////////////////////////////////////

	template <class T>	
	inline void CMemoryPool<T>::Chunk::reset(pointer pNext /* = NULL */)
	{
		assert((pLast - pFirst) % BLOCK_SIZE == 0);	// alignment check
		pointer pBlock;

		// Mark up the chunk. Create 'next' pointers in the available space.
		for (byte_t* p = pFirst; p != pLast; p += BLOCK_SIZE)
		{
			pBlock = reinterpret_cast<pointer>(p);
			pBlock->next = reinterpret_cast<pointer>(p + BLOCK_SIZE);
		}

		// Now p == pLast
		pBlock = reinterpret_cast<pointer>(p);
		pBlock->next = pNext;
	}

	// Dtor.
	template <class T>
	CMemoryPool<T>::~CMemoryPool()
	{
		if (prev_ != this)
		{
			// there are other allocators managing the same memory
			prev_->next_ = next_;
			next_->prev_ = prev_;
			return;
		}

		assert(prev_ == next_);

		// Release memory
		for (Chunks::iterator i = chunks_.begin(); i != chunks_.end(); ++i)
		{
			delete [] i->pData;
		}
	}

	// Allocate
	template <class T>
	inline typename CMemoryPool<T>::pointer 
	CMemoryPool<T>::allocate()
	{
		//assert(num == 1);	// must be 1

		if (pFree == NULL)
		{
			expand();
		}
		
		pointer p = pFree;
		pFree = p->next;
		//pTail = p;

		return p;
	}

	template <class T>
	int 
	CMemoryPool<T>::findBlock(typename CMemoryPool<T>::pointer p)
	{
		for (Chunks::iterator i = chunks_.begin(); i != chunks_.end(); ++i)
		{
			if (((byte_t*)p >= i->pFirst) && ((byte_t*)p <= i->pLast))	
			{
				// alignment check
				return (((byte_t*)p - i->pFirst) % Chunk::BLOCK_SIZE == 0);	
			}
		}
		return 0;
	}

	template <class T>
	void
	CMemoryPool<T>::expand()
	{
		pointer last = (!chunks_.empty()) ? (pointer)(chunks_.back().pLast) : NULL;

		// create new memory chunk
		Chunk ch;
		ch.init();
		chunks_.push_back(ch);
		pFree = (pointer)ch.pFirst;

		// link new chunk to the previous one (if the previous one exists). 
		if (last != NULL) last->next = pFree;	
	}
} // namespace reed

#pragma warning(pop)	// restore state for each warning

#endif //__MEM_POOL_H_198300CE_37CE_476E_A5E7_6A700185B361_INCLUDED_

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)

Written By

Eugene Shatokhin

Software Developer (Senior)

Russian Federation

This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Fast Automatic Differentiation in C++

License

Comments and Discussions