Click here to Skip to main content
15,894,291 members
Articles / Artificial Intelligence / Neural Networks

Multiple convolution neural networks approach for online handwriting recognition

Rate me:
Please Sign up or sign in to vote.
4.95/5 (37 votes)
9 Apr 2013CPOL8 min read 76.8K   25.1K   74  
The research focuses on the presentation of word recognition technique for an online handwriting recognition system which uses multiple component neural networks (MCNN) as the exchangeable parts of the classifier.
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using ANN.Perceptron.ArchiveSerialization;
using ANN.Perceptron.Layers;
using ANN.Perceptron.Neurons;
using System.IO;
using System.Threading.Tasks;
namespace ANN.Perceptron.Network
{
    // Neural Network class
    public class BackPropagation : ForwardPropagation
    {
        #region Properties
        public uint Backprops;  // counter used in connection with Weight sanity check
        #endregion
        public BackPropagation():base()
            {
                EtaLearningRate = .001;  // arbitrary, so that brand-new NNs can be serialized with a non-ridiculous number
                Backprops = 0;
                network = null;
            }
            public BackPropagation(ConvolutionNetwork net):base(net)
            {
                EtaLearningRate = .001;  // arbitrary, so that brand-new NNs can be serialized with a non-ridiculous number
                Backprops = 0;
                network = net;
            }
       
            public void Backpropagate(double[] actualOutput, double[] desiredOutput, int count, NeuronOutputs[] pMemorizedNeuronOutputs)
            { 
                        // backpropagates through the neural net
	
	            if(( network.LayerCount >= 2 )==false) // there must be at least two network.Layers in the net
                {
                    return;           
                }
	            if ( ( actualOutput == null ) || ( desiredOutput == null ) || ( count >= 256 ) )
		            return;
	
	
	            // check if it's time for a weight sanity check
	            if ( (Backprops % 10000) == 0 )
	            {
		            // every 10000 backprops
		
		            PeriodicWeightSanityCheck();
	            }
	
	            
	            // proceed from the last layer to the first, iteratively
	            // We calculate the last layer separately, and first, since it provides the needed derviative
	            // (i.e., dErr_wrt_dXnm1) for the previous network.Layers
	
	            // nomenclature:
	            //
	            // Err is output error of the entire neural net
	            // Xn is the output vector on the n-th layer
	            // Xnm1 is the output vector of the previous layer
	            // Wn is the vector of weights of the n-th layer
	            // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
	            // F is the squashing function: Xn = F(Yn)
	            // F' is the derivative of the squashing function
	            //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input

               int iSize = network.LayerCount;
               var dErr_wrt_dXlast = new double[network.Layers[network.LayerCount - 1].NeuronCount];
               var differentials = new List<double[]>(iSize);
	
	            // start the process by calculating dErr_wrt_dXn for the last layer.
	            // for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is simply
	            // the difference between the target and the actual

               /*TODO: Check potentially-changing upper bound expression "network.Layers[network.LayerCount - 1].NeuronCount" which is now called only *once*,
               to ensure the new Parallel.For call matches behavior in the original for-loop
              (where this upper bound expression had previously been evaluated at the start of *every* loop iteration).*/
               Parallel.For(0, network.Layers[network.LayerCount - 1].NeuronCount, ii =>
               {
                   dErr_wrt_dXlast[ii] = actualOutput[ii] - desiredOutput[ii];
               });
	
	
	            // store Xlast and reserve memory for the remaining vectors stored in differentials
	
	            
	           
	            for ( int ii=0; ii<iSize-1; ii++ )
	            {
                    var m_differential = new double[network.Layers[ii].NeuronCount];
                    /*TODO: Check potentially-changing upper bound expression "network.Layers[ii].NeuronCount" which is now called only *once*,
                    to ensure the new Parallel.For call matches behavior in the original for-loop
                   (where this upper bound expression had previously been evaluated at the start of *every* loop iteration).*/
                    Parallel.For(0, network.Layers[ii].NeuronCount, kk =>
                    {
                        m_differential[kk] = 0.0;
                    });
                    differentials.Add(m_differential);
                }
	            differentials.Add(dErr_wrt_dXlast);  // last one
	            // now iterate through all layers including the last but excluding the first, and ask each of
	            // them to backpropagate error and adjust their weights, and to return the differential
	            // dErr_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer
	
	            bool bMemorized = ( pMemorizedNeuronOutputs != null );
	            for ( int jj=iSize-1; jj>0;jj--)
	            {
		            if ( bMemorized != false )
		            {
			           network.Layers[jj].Backpropagate( differentials[ jj ], differentials[ jj - 1 ], 
				            pMemorizedNeuronOutputs[jj], pMemorizedNeuronOutputs[ jj - 1 ], EtaLearningRate );
		            }
		            else
		            {
                        network.Layers[jj].Backpropagate(differentials[jj], differentials[jj - 1], 
				            null, null, EtaLearningRate );
		            }
	            }

                differentials.Clear();
            }
            public void EraseHessianInformation()
            {
                foreach (var lit in network.Layers)
                {
                    lit.EraseHessianInformation();
                }
            }
            public void DivideHessianInformationBy(double divisor)
            {

                // controls each layer to divide its current diagonal Hessian info by a common divisor. 
                // A check is also made to ensure that each Hessian is strictly zero-positive

                foreach (var lit in network.Layers)
                {
                    lit.DivideHessianInformationBy(divisor);
                }
	
            }
            public void BackpropagateSecondDervatives(double[] actualOutputVector, double[] targetOutputVector, int outputCount)
            { 
                // calculates the second dervatives (for diagonal Hessian) and backpropagates
	            // them through neural net
	
		
	            if( network.LayerCount< 2 ){return;};  // there must be at least two layers in the net

                if ((actualOutputVector == null) || (targetOutputVector == null) || (outputCount >= 256))
                {
                    return;
                }
               
	            // we use nearly the same nomenclature as above (e.g., "dErr_wrt_dXnm1") even though everything here
	            // is actually second derivatives and not first derivatives, since otherwise the ASCII would 
	            // become too confusing.  To emphasize that these are second derivatives, we insert a "2"
	            // such as "d2Err_wrt_dXnm1".  We don't insert the second "2" that's conventional for designating
	            // second derivatives"

                int iSize = network.LayerCount;
                int neuronCount = network.Layers[network.LayerCount - 1].NeuronCount;
                var d2Err_wrt_dXlast = new double[neuronCount];
                var differentials = new List<double[]>(iSize);
	           
	
	            // start the process by calculating the second derivative dErr_wrt_dXn for the last layer.
	            // for the standard MSE Err function (i.e., 0.5*sumof( (actual-target)^2 ), this differential is 
	            // exactly one

                var lit = network.Layers.Last();  // point to last layer

                /*TODO: Check potentially-changing upper bound expression "lit.NeuronCount" which is now called only *once*,
                to ensure the new Parallel.For call matches behavior in the original for-loop
               (where this upper bound expression had previously been evaluated at the start of *every* loop iteration).*/
                Parallel.For(0, lit.NeuronCount,ParallelOption, ii =>
                {
                    d2Err_wrt_dXlast[ii] = 1.0;
                });	
	
	            // store Xlast and reserve memory for the remaining vectors stored in differentials
	
	
	            for ( int ii=0; ii<iSize-1; ii++ )
	            {
                    var m_differential = new double[network.Layers[ii].NeuronCount];
                    /*TODO: Check potentially-changing upper bound expression "network.Layers[ii].NeuronCount" which is now called only *once*,
                    to ensure the new Parallel.For call matches behavior in the original for-loop
                   (where this upper bound expression had previously been evaluated at the start of *every* loop iteration).*/
                    Parallel.For(0, network.Layers[ii].NeuronCount, kk =>
                    {
                        m_differential[kk] = 0.0;
                    });
                    differentials.Add(m_differential);
                   
	            }
	
	            differentials.Add(d2Err_wrt_dXlast);  // last one
	
	            // now iterate through all layers including the last but excluding the first, starting from
	            // the last, and ask each of
	            // them to backpropagate the second derviative and accumulate the diagonal Hessian, and also to
	            // return the second dervative
	            // d2Err_wrt_dXnm1 for use as the input value of dErr_wrt_dXn for the next iterated layer (which
	            // is the previous layer spatially)
	         
	            for ( int ii = iSize - 1; ii>0; ii--)
	            {
		            network.Layers[ii].BackpropagateSecondDerivatives( differentials[ ii ], differentials[ ii - 1 ] );
		        }
	
	            differentials.Clear();
            }
            protected void PeriodicWeightSanityCheck()
            {
                // fucntion that simply goes through all weights, and tests them against an arbitrary
                // "reasonable" upper limit.  If the upper limit is exceeded, a warning is displayed

                foreach (var lit in network.Layers)
                {
                    lit.PeriodicWeightSanityCheck();
                }
            }
            override public void Serialize(Archive ar)
            { 
                if (ar.IsStoring())
	            {
                    
		            // TODO: add storing code here
                    ar.Write(EtaLearningRate);
                    ar.Write(network.LayerCount);
                    foreach (var lit in network.Layers)
		            {
			            lit.Serialize( ar );
		            }
		 
	            }
	            else
	            {
		            // TODO: add loading code here
		
		            double eta; 
		            ar.Read(out eta);
		            EtaLearningRate = eta;  // two-step storage is needed since m_etaLearningRate is "volatile"
		
		            int nLayers;
                    var pLayer = (CommonLayer)null;

                    ar.Read(out nLayers);
                  
                    network.Layers = new CommonLayer[nLayers];
		            for ( int ii=0; ii<nLayers; ii++ )
		            {
			            pLayer = new CommonLayer( "", pLayer );
			
			            network.Layers[ii]=pLayer;
			            pLayer.Serialize( ar );
		            }
		
	            }
            }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Vietnam Maritime University
Vietnam Vietnam
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions