Click here to Skip to main content
15,886,664 members
Articles / Programming Languages / C#

Neural Network for Recognition of Handwritten Digits in C#

Rate me:
Please Sign up or sign in to vote.
4.93/5 (89 votes)
14 Mar 2012MIT9 min read 308.7K   48K   217  
This article is an example of an artificial neural network designed to recognize handwritten digits.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using ArchiveSerialization;
using System.IO;
namespace NeuralNetworkLibrary
{
    // Neural Network class
    public class NeuralNetwork : IArchiveSerialization
    {
        	public double m_etaLearningRatePrevious;
        	public double m_etaLearningRate;
            public uint m_cBackprops;  // counter used in connection with Weight sanity check
            public NNLayerList m_Layers;
            public NeuralNetwork()
            {
                m_etaLearningRate = .001;  // arbitrary, so that brand-new NNs can be serialized with a non-ridiculous number
                m_cBackprops = 0;
                m_Layers = new NNLayerList();
            }
        
            public void Calculate(double[] inputVector, int iCount, 
							      double[] outputVector /* =NULL */, int oCount /* =0 */,
							      NNNeuronOutputsList pNeuronOutputs /* =NULL */ )
            {
                var lit = m_Layers.First();
                 // first layer is imput layer: directly set outputs of all of its neurons
                // to the input vector
                if (m_Layers.Count > 1)
                {
                    
                    int count = 0;
                    if (iCount != lit.m_Neurons.Count)
                    {
                        return;
                    }
                    foreach (var nit in lit.m_Neurons) 
                    {
                        if (count < iCount)
                        {
                            nit.output = inputVector[count];
                            count++;
                        }
                    }

                }
                //caculate output of next layers
                for (int i = 1;i<m_Layers.Count; i++)
                {
                    m_Layers[i].Calculate();
                }

                // load up output vector with results

                if (outputVector != null)
                {
                    lit = m_Layers[m_Layers.Count - 1];
 
                    for (int ii = 0; ii < oCount; ii++)
                    {
                        outputVector[ii] = lit.m_Neurons[ii].output;
                    }
                }

                // load up neuron output values with results
                if (pNeuronOutputs != null)
                {
                    // check for first time use (re-use is expected)
                        pNeuronOutputs.Clear();
                        // it's empty, so allocate memory for its use
                        pNeuronOutputs.Capacity=m_Layers.Count;
                        foreach (NNLayer nnlit in m_Layers)
                        {
                            var layerOut = new NNNeuronOutputs(nnlit.m_Neurons.Count);
                            for (int ii = 0; ii < nnlit.m_Neurons.Count; ++ii)
                            {
                                layerOut.Add(nnlit.m_Neurons[ii].output);
                            }
                            pNeuronOutputs.Add(layerOut);
                        }
              
                    
                }
            }
            public void Backpropagate(double[] actualOutput, double[] desiredOutput, int count, NNNeuronOutputsList pMemorizedNeuronOutputs)
            { 
                        // backpropagates through the neural net
	
	            if(( m_Layers.Count >= 2 )==false) // there must be at least two layers in the net
                {
                    return;           
                }
	            if ( ( actualOutput == null ) || ( desiredOutput == null ) || ( count >= 256 ) )
		            return;
	
	
	            // check if it's time for a weight sanity check
	
	            m_cBackprops++;
	
	            if ( (m_cBackprops % 10000) == 0 )
	            {
		            // every 10000 backprops
		
		            PeriodicWeightSanityCheck();
	            }
	
	            
	            // proceed from the last layer to the first, iteratively
	            // We calculate the last layer separately, and first, since it provides the needed derviative
	            // (i.e., dErr_wrt_dXnm1) for the previous layers
	
	            // nomenclature:
	            //
	            // Err is output error of the entire neural net
	            // Xn is the output vector on the n-th layer
	            // Xnm1 is the output vector of the previous layer
	            // Wn is the vector of weights of the n-th layer
	            // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
	            // F is the squashing function: Xn = F(Yn)
	            // F' is the derivative of the squashing function
	            //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input

               int iSize = m_Layers.Count;
               var dErr_wrt_dXlast = new DErrorsList(m_Layers[m_Layers.Count - 1].m_Neurons.Count);
               var differentials = new List<DErrorsList>(iSize);

               int ii;
	
	            // start the process by calculating dErr_wrt_dXn for the last layer.
	            // for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is simply
	            // the difference between the target and the actual

                for (ii = 0; ii < m_Layers[m_Layers.Count - 1].m_Neurons.Count; ++ii)
	            {
		            dErr_wrt_dXlast.Add(actualOutput[ ii ] - desiredOutput[ ii ]);
	            }
	
	
	            // store Xlast and reserve memory for the remaining vectors stored in differentials
	
	            
	           
	            for ( ii=0; ii<iSize-1; ii++ )
	            {
                    var m_differential = new DErrorsList(m_Layers[ii].m_Neurons.Count);
                    for (int kk = 0; kk < m_Layers[ii].m_Neurons.Count; kk++)
                    {
                        m_differential.Add(0.0);
                    }
                    differentials.Add(m_differential);
                }
	            differentials.Add(dErr_wrt_dXlast);  // last one
	            // now iterate through all layers including the last but excluding the first, and ask each of
	            // them to backpropagate error and adjust their weights, and to return the differential
	            // dErr_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer
	
	            bool bMemorized = ( pMemorizedNeuronOutputs != null );
	            for ( int jj=iSize-1; jj>0;jj--)
	            {
		            if ( bMemorized != false )
		            {
			           m_Layers[jj].Backpropagate( differentials[ jj ], differentials[ jj - 1 ], 
				            pMemorizedNeuronOutputs[jj], pMemorizedNeuronOutputs[ jj - 1 ], m_etaLearningRate );
		            }
		            else
		            {
                        m_Layers[jj].Backpropagate(differentials[jj], differentials[jj - 1], 
				            null, null, m_etaLearningRate );
		            }
		
		        
	            }


                differentials.Clear();
            }
            public void EraseHessianInformation()
            {
                foreach (var lit in m_Layers)
                {
                    lit.EraseHessianInformation();
                }
            }
            public void DivideHessianInformationBy(double divisor)
            {

                // controls each layer to divide its current diagonal Hessian info by a common divisor. 
                // A check is also made to ensure that each Hessian is strictly zero-positive

                foreach (var lit in m_Layers)
                {
                    lit.DivideHessianInformationBy(divisor);
                }
	
            }
            public void BackpropagateSecondDervatives(double[] actualOutputVector, double[] targetOutputVector, uint count)
            { 
                // calculates the second dervatives (for diagonal Hessian) and backpropagates
	            // them through neural net
	
		
	            if( m_Layers.Count< 2 ){return;};  // there must be at least two layers in the net

                if ((actualOutputVector == null) || (targetOutputVector == null) || (count >= 256))
                {
                    return;
                }
               
	            // we use nearly the same nomenclature as above (e.g., "dErr_wrt_dXnm1") even though everything here
	            // is actually second derivatives and not first derivatives, since otherwise the ASCII would 
	            // become too confusing.  To emphasize that these are second derivatives, we insert a "2"
	            // such as "d2Err_wrt_dXnm1".  We don't insert the second "2" that's conventional for designating
	            // second derivatives"

                int iSize = m_Layers.Count;
                int neuronCount = m_Layers[m_Layers.Count - 1].m_Neurons.Count;
                var d2Err_wrt_dXlast = new DErrorsList(neuronCount);
                var differentials = new List<DErrorsList>(iSize);
	           
	
	            // start the process by calculating the second derivative dErr_wrt_dXn for the last layer.
	            // for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is 
	            // exactly one

                var lit = m_Layers.Last();  // point to last layer
	
	            for ( int ii=0; ii<lit.m_Neurons.Count; ii++ )
	            {
		            d2Err_wrt_dXlast.Add(1.0);
	            }	
	
	            // store Xlast and reserve memory for the remaining vectors stored in differentials
	
	
	            for ( int ii=0; ii<iSize-1; ii++ )
	            {
                    var m_differential = new DErrorsList(m_Layers[ii].m_Neurons.Count);
                    for (int kk = 0; kk < m_Layers[ii].m_Neurons.Count; kk++)
                    {
                        m_differential.Add(0.0);
                    }
                    differentials.Add(m_differential);
                   
	            }
	
	            differentials.Add(d2Err_wrt_dXlast);  // last one
	
	            // now iterate through all layers including the last but excluding the first, starting from
	            // the last, and ask each of
	            // them to backpropagate the second derviative and accumulate the diagonal Hessian, and also to
	            // return the second dervative
	            // d2Err_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer (which
	            // is the previous layer spatially)
	         
	            for ( int ii = iSize - 1; ii>0; ii--)
	            {
		            m_Layers[ii].BackpropagateSecondDerivatives( differentials[ ii ], differentials[ ii - 1 ] );
		        }
	
	            differentials.Clear();
            }
            void PeriodicWeightSanityCheck()
            {
                // fucntion that simply goes through all weights, and tests them against an arbitrary
                // "reasonable" upper limit.  If the upper limit is exceeded, a warning is displayed

                foreach (var lit in m_Layers)
                {
                    lit.PeriodicWeightSanityCheck();
                }
            }
            virtual public void Serialize(Archive ar)
            { 
                if (ar.IsStoring())
	            {
                    
		            // TODO: add storing code here
                    ar.Write(m_etaLearningRate);
                    ar.Write(m_Layers.Count);
                    foreach (var lit in m_Layers)
		            {
			            lit.Serialize( ar );
		            }
		 
	            }
	            else
	            {
		            // TODO: add loading code here
		
		            double eta; 
		            ar.Read(out eta);
		            m_etaLearningRate = eta;  // two-step storage is needed since m_etaLearningRate is "volatile"
		
		            int nLayers;
                    var pLayer = (NNLayer)null;

                    ar.Read(out nLayers);
                    m_Layers.Clear();
                    m_Layers = new NNLayerList(nLayers);
		            for ( int ii=0; ii<nLayers; ii++ )
		            {
			            pLayer = new NNLayer( "", pLayer );
			
			            m_Layers.Add(pLayer);
			            pLayer.Serialize( ar );
		            }
		
	            }
            }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The MIT License


Written By
Vietnam Maritime University
Vietnam Vietnam
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions