Click here to Skip to main content
15,891,905 members
Articles / Artificial Intelligence / Neural Networks

Multiple convolution neural networks approach for online handwriting recognition

Rate me:
Please Sign up or sign in to vote.
4.95/5 (37 votes)
9 Apr 2013CPOL8 min read 76.6K   25.1K   74  
The research focuses on the presentation of word recognition technique for an online handwriting recognition system which uses multiple component neural networks (MCNN) as the exchangeable parts of the classifier.
using System;
using ANN.Perceptron.ArchiveSerialization;
using System.Drawing;
using System.Threading;
using System.Threading.Tasks;
using ANN.Perceptron.Common;
using ANN.Perceptron.Connections;
using ANN.Perceptron.Neurons;
using ANN.Perceptron.Weights;
namespace ANN.Perceptron.Layers
{
  // Layer class
    public class CommonLayer : NetworkProvider
    {
     
        protected string label;
        protected CommonLayer prevLayer;
        public CommonLayer PrevLayer
        {
            get
            {
                return prevLayer;
            }
            set
            {
                if (prevLayer == value)
                    return;
                prevLayer = value;
            }
        }
        protected int neuronCount;
        protected Weight[] weights;
        protected int weightCount;
    
        protected ActivationFunction.SigmoidFunction sigmoid;
        protected bool floatingPointWarning;  // flag for one-time warning (per layer) about potential floating point overflow
        protected Size featureMapSize;
        protected int nFeatureMaps;
        protected LayerTypes type;
        protected Neuron[] neurons;
        public LayerTypes LayerType
        {
            get
            {
                return type;
            }
            set
            {
                if (type == value)
                    return;
                type = value;
            }
        }
        public string Label
        {
            get
            {
                return label;
            }
            set
            {
                if (label == value)
                    return;
                label = value;
            }
        }
        public int FeatureMapCount
        {
            get
            {
                return nFeatureMaps;
            }
            set
            {
                if (nFeatureMaps == value)
                    return;
                nFeatureMaps = value;
            }
        }
        public int WeightCount
        {
            get
            {
                return weightCount;
            }
            set
            {
                if (weightCount == value)
                    return;
                weightCount = value;
            }
        }
        public int NeuronCount
        {
            get
            {
                return neuronCount;
            }
            set
            {
                if (neuronCount == value)
                    return;
                neuronCount = value;
            }
        }
        public Weight[] Weights
        {
            get
            {
                return weights;
            }
            set
            {
                if (weights == value)
                    return;
                weights = value;
            }
        }
        public Neuron[] Neurons
        {
            get
            {
                return neurons;
            }
            set
            {
                if (neurons == value)
                    return;
                neurons = value;
            }
        }
        public Size FeatureMapSize
        {
            get
            {
                return featureMapSize;
            }
            set
            {
                if (featureMapSize == value)
                    return;
                featureMapSize = value;
            }
        }
        public CommonLayer():base()
        {
            label = "";
            prevLayer = null;
            sigmoid = new ActivationFunction.SigmoidFunction();
            Weights = null;
            Neurons = null;
            weightCount = 0;
            neuronCount = 0;
        }
        public CommonLayer(String sLabel,CommonLayer prelayer)
            : this()
        {
            label = sLabel;
            prevLayer = prelayer;
        }
        public virtual void Initialize()
        {
            floatingPointWarning = false;
            CreateLayer();
        }
        //Sigmoid activation function
        public double SIGMOID(double x)
        {
            return (1.7159 * System.Math.Tanh(0.66666667 * x));

        }
        /// <summary>
        /// // // derivative of the sigmoid as a function of the sigmoid's output
        /// </summary>
        /// <param name="S"></param>
        /// <returns></returns>
        public double DSIGMOID(double S)
        {
            return (0.66666667 / 1.7159 * (1.7159 + (S)) * (1.7159 - (S)));
        }
        public void Calculate()
        {
            if (prevLayer != null)
            {
                
                Parallel.ForEach(Neurons,ParallelOption, nit =>
                {
                    double dSum = 0;
                    foreach (var cit in nit.Connections)
                    {
                        if (cit == nit.Connections[0])
                        {
                            dSum = (weights[(int)cit.WeightIndex].value);
                        }
                        else
                        {
                            dSum += (weights[(int)cit.WeightIndex].value) * (prevLayer.Neurons[(int)cit.NeuronIndex].output);
                        }
                    }
                    nit.output = SIGMOID(dSum);
                });
                
            }

        }
        /////////////
        public void Backpropagate(double[] dErr_wrt_dXn /* in */,
                            double[] dErr_wrt_dXnm1 /* out */,
                            NeuronOutputs thisLayerOutput,  // memorized values of this layer's output
                            NeuronOutputs prevLayerOutput,  // memorized values of previous layer's output
                            double etaLearningRate)
        {
            // nomenclature (repeated from NeuralNetwork class):
            //
            // Err is output error of the entire neural net
            // Xn is the output vector on the n-th layer
            // Xnm1 is the output vector of the previous layer
            // Wn is the vector of weights of the n-th layer
            // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
            // F is the squashing function: Xn = F(Yn)
            // F' is the derivative of the squashing function
            //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
            try
            {

                int nIndex;
                double[] dErr_wrt_dYn = new double[neuronCount];
                //
                //	std::vector< double > dErr_wrt_dWn( m_Weights.size(), 0.0 );  // important to initialize to zero
                //////////////////////////////////////////////////
                //
                ///// DESIGN TRADEOFF: REVIEW !!
                // We would prefer (for ease of coding) to use STL vector for the array "dErr_wrt_dWn", which is the 
                // differential of the current pattern's error wrt weights in the layer.  However, for layers with
                // many weights, such as fully-connected layers, there are also many weights.  The STL vector
                // class's allocator is remarkably stupid when allocating large memory chunks, and causes a remarkable 
                // number of page faults, with a consequent slowing of the application's overall execution time.

                // To fix this, I tried using a plain-old C array, by new'ing the needed space from the heap, and 
                // delete[]'ing it at the end of the function.  However, this caused the same number of page-fault
                // errors, and did not improve performance.

                // So I tried a plain-old C array allocated on the stack (i.e., not the heap).  Of course I could not
                // write a statement like 
                //    double dErr_wrt_dWn[ m_Weights.size() ];
                // since the compiler insists upon a compile-time known constant value for the size of the array.  
                // To avoid this requirement, I used the _alloca function, to allocate memory on the stack.
                // The downside of this is excessive stack usage, and there might be stack overflow probelms.  That's why
                // this comment is labeled "REVIEW"
                double[] dErr_wrt_dWn = new double[weightCount];
                Parallel.For(0, weightCount,ParallelOption, ii =>
                {
                    dErr_wrt_dWn[ii] = 0.0;
                });
                //ParallelOption.MaxDegreeOfParallelism = 1;
                bool bMemorized = (thisLayerOutput != null) && (prevLayerOutput != null);
                // calculate dErr_wrt_dYn = F'(Yn) * dErr_wrt_Xn
                Parallel.For(0, neuronCount,ParallelOption, ii =>
                {
                    double output;
                    if (bMemorized != false)
                    {
                        output = thisLayerOutput[ii];
                    }
                    else
                    {
                        output = Neurons[ii].output;
                    }
                    dErr_wrt_dYn[ii] = (DSIGMOID(output) * dErr_wrt_dXn[ii]);
                });

                // calculate dErr_wrt_Wn = Xnm1 * dErr_wrt_Yn
                // For each neuron in this layer, go through the list of connections from the prior layer, and
                // update the differential for the corresponding weight
                
                
                Parallel.For(0, neuronCount,ParallelOption, index =>
                {
                    var nit = neurons[index];
                    foreach (Connection cit in nit.Connections)
                    {
                        double output;
                        uint kk = cit.NeuronIndex;
                        if (kk == 0xffffffff)
                        {
                            output = 1.0; // this is the bias weight
                        }
                        else
                        {
                            if (bMemorized != false)
                            {
                                output = prevLayerOutput[(int)kk];
                            }
                            else
                            {
                                output = prevLayer.Neurons[(int)kk].output;
                            }
                        }
                        dErr_wrt_dWn[cit.WeightIndex] = dErr_wrt_dYn[index] * output;
                    }
                });
                // calculate dErr_wrt_Xnm1 = Wn * dErr_wrt_dYn, which is needed as the input value of
                // dErr_wrt_Xn for backpropagation of the next (i.e., previous) layer
                // For each neuron in this layer

                Parallel.For(0, neuronCount, ParallelOption, index =>
                {
                    var nit = neurons[index];
                    foreach (Connection cit in nit.Connections)
                    {
                        uint kk = cit.NeuronIndex;
                        if (kk != 0xffffffff)
                        {
                            // we exclude ULONG_MAX, which signifies the phantom bias neuron with
                            // constant output of "1", since we cannot train the bias neuron
                            nIndex = (int)kk;
                            dErr_wrt_dXnm1[nIndex] += dErr_wrt_dYn[index] * Weights[(int)cit.WeightIndex].value;
                        }
                    }
                });
                // finally, update the weights of this layer neuron using dErr_wrt_dW and the learning rate eta
                // Use an atomic compare-and-exchange operation, which means that another thread might be in 
                // the process of backpropagation and the weights might have shifted slightly
                Parallel.For(0, weightCount,ParallelOption, j =>
                {
                    //const double dMicron = 0.10;
                    weights[j].value -= (etaLearningRate / (weights[j].diagHessian + 0.10)) * dErr_wrt_dWn[j];
                });

            }
            catch (Exception ex)
            {
              return;
            }


        }


        public void PeriodicWeightSanityCheck()
        {
            // called periodically by the neural net, to request a check on the "reasonableness" of the 
            // weights.  The warning message is given only once per layer

            if (weights != null)
            {

                Parallel.ForEach(weights,ParallelOption, wit =>
                {
                    double val = System.Math.Abs(wit.value);
                    if ((val > 100.0) && (floatingPointWarning == false))
                    {
                        // 100.0 is an arbitrary value, that no reasonable weight should ever exceed
                        /*
                                        string strMess = ""; ;
                                        strMess.Format("Caution: Weights are becoming unboundedly large \n"+
                                            "Layer: %s \nWeight: %s \nWeight value = %g \nWeight Hessian = %g\n\n"+
                                             "Suggest abandoning this backpropagation and investigating",
                                            label, wit.label, wit.value, wit.diagHessian );
                                        //show message box
                                        //MessageBox.show( NULL, strMess, _T( "Problem With Weights" ), MB_ICONEXCLAMATION | MB_OK );
                                        */
                        floatingPointWarning = true;
                    }
                });
            }
        }
        public void EraseHessianInformation()
        {
            // goes through all the weights associated with this layer, and sets each of their
            // diagHessian value to zero
            if (Weights != null)
            {

                Parallel.ForEach(Weights, ParallelOption,wit =>
                {
                    wit.diagHessian = 0.0;
                });
            }

        }
        public void DivideHessianInformationBy(double divisor)
        {
            // goes through all the weights associated with this layer, and divides each of their
            // diagHessian value by the indicated divisor
            if (Weights != null)
            {

                Parallel.ForEach(Weights,ParallelOption, wit =>
                {
                    double dTemp;
                    dTemp = wit.diagHessian;
                    if (dTemp < 0.0)
                    {
                        // it should not be possible to reach here, since all calculations for the second
                        // derviative are strictly zero-positive.  However, there are some early indications 
                        // that this check is necessary anyway
                        dTemp = 0.0;
                    }
                    wit.diagHessian = dTemp / divisor;
                });
            }
        }
        public void BackpropagateSecondDerivatives(double[] d2Err_wrt_dXn /* in */,
                                                    double[] d2Err_wrt_dXnm1 /* out */)
        {
            // nomenclature (repeated from NeuralNetwork class)
            // NOTE: even though we are addressing SECOND derivatives ( and not first derivatives),
            // we use nearly the same notation as if there were first derivatives, since otherwise the
            // ASCII look would be confusing.  We add one "2" but not two "2's", such as "d2Err_wrt_dXn",
            // to give a gentle emphasis that we are using second derivatives
            //
            // Err is output error of the entire neural net
            // Xn is the output vector on the n-th layer
            // Xnm1 is the output vector of the previous layer
            // Wn is the vector of weights of the n-th layer
            // Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
            // F is the squashing function: Xn = F(Yn)
            // F' is the derivative of the squashing function
            //   Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input 

          

            var d2Err_wrt_dYn = new double[neuronCount];
            //
            // std::vector< double > d2Err_wrt_dWn( m_Weights.size(), 0.0 );  // important to initialize to zero
            //////////////////////////////////////////////////
            //
            ///// DESIGN TRADEOFF: REVIEW !!
            //
            // Note that the reasoning of this comment is identical to that in the NNLayer::Backpropagate() 
            // function, from which the instant BackpropagateSecondDerivatives() function is derived from
            //
            // We would prefer (for ease of coding) to use STL vector for the array "d2Err_wrt_dWn", which is the 
            // second differential of the current pattern's error wrt weights in the layer.  However, for layers with
            // many weights, such as fully-connected layers, there are also many weights.  The STL vector
            // class's allocator is remarkably stupid when allocating large memory chunks, and causes a remarkable 
            // number of page faults, with a consequent slowing of the application's overall execution time.

            // To fix this, I tried using a plain-old C array, by new'ing the needed space from the heap, and 
            // delete[]'ing it at the end of the function.  However, this caused the same number of page-fault
            // errors, and did not improve performance.

            // So I tried a plain-old C array allocated on the stack (i.e., not the heap).  Of course I could not
            // write a statement like 
            //    double d2Err_wrt_dWn[ m_Weights.size() ];
            // since the compiler insists upon a compile-time known constant value for the size of the array.  
            // To avoid this requirement, I used the _alloca function, to allocate memory on the stack.
            // The downside of this is excessive stack usage, and there might be stack overflow probelms.  That's why
            // this comment is labeled "REVIEW"

            double[] d2Err_wrt_dWn = new double[weightCount];
            Parallel.For(0, weightCount,ParallelOption, ii =>
            {
                d2Err_wrt_dWn[ii] = 0.0;
            });
            // calculate d2Err_wrt_dYn = ( F'(Yn) )^2 * dErr_wrt_Xn (where dErr_wrt_Xn is actually a second derivative )

            Parallel.For(0, neuronCount,ParallelOption, ii =>
            {
                double output;
                double dTemp;
                output = Neurons[ii].output;
                dTemp = ActivationFunction.SigmoidFunction.DSIGMOID(output);
                d2Err_wrt_dYn[ii] = (d2Err_wrt_dXn[ii] * dTemp * dTemp);
            });
            // calculate d2Err_wrt_Wn = ( Xnm1 )^2 * d2Err_wrt_Yn (where dE2rr_wrt_Yn is actually a second derivative)
            // For each neuron in this layer, go through the list of connections from the prior layer, and
            // update the differential for the corresponding weight

            Parallel.For(0, neuronCount,ParallelOption, i =>
            {
                /*TODO: Check potentially-changing upper bound expression "nit.ConnectionCount" which is now called only *once*,
                                        to ensure the new Parallel.For call matches behavior in the original for-loop
                                       (where this upper bound expression had previously been evaluated at the start of *every* loop iteration).*/
                var nit = neurons[i];
                foreach (var cit in nit.Connections)
                {
                    try
                    {
                        double output;
                        uint kk = (uint)cit.NeuronIndex;
                        if (kk == 0xffffffff)
                        {
                            output = 1.0; // this is the bias connection; implied neuron output of "1"
                        }
                        else
                        {
                            output = prevLayer.Neurons[(int)kk].output;
                        }
                        ////////////	ASSERT( (*cit).WeightIndex < d2Err_wrt_dWn.size() );  // since after changing d2Err_wrt_dWn to a C-style array, the size() function this won't work
                        //d2Err_wrt_dWn[cit.WeightIndex] += d2Err_wrt_dYn[ii] * output * output;
                        d2Err_wrt_dWn[cit.WeightIndex] = d2Err_wrt_dYn[i] * output * output;
                    }
                    catch (Exception ex)
                    {
                    }
                }
            });
            // calculate d2Err_wrt_Xnm1 = ( Wn )^2 * d2Err_wrt_dYn (where d2Err_wrt_dYn is a second derivative not a first).
            // d2Err_wrt_Xnm1 is needed as the input value of
            // d2Err_wrt_Xn for backpropagation of second derivatives for the next (i.e., previous spatially) layer
            // For each neuron in this layer


            Parallel.For(0, neuronCount,ParallelOption, i =>
            {
                var nit = neurons[i];
                foreach (var cit in nit.Connections)
                {
                    try
                    {
                        uint kk = cit.NeuronIndex;
                        if (kk != 0xffffffff)
                        {
                            // we exclude ULONG_MAX, which signifies the phantom bias neuron with
                            // constant output of "1", since we cannot train the bias neuron
                            int nIndex;
                            nIndex = (int)kk;
                            double dTemp;
                            dTemp = Weights[(int)cit.WeightIndex].value;
                            dTemp *= dTemp;
                            d2Err_wrt_dXnm1[nIndex] += d2Err_wrt_dYn[i] * dTemp;
                        }
                    }
                    catch (Exception ex)
                    {
                        return;
                    }
                }
            });
            

            // finally, update the diagonal Hessians for the weights of this layer neuron using dErr_wrt_dW.
            // By design, this function (and its iteration over many (approx 500 patterns) is called while a 
            // single thread has locked the nueral network, so there is no possibility that another
            // thread might change the value of the Hessian.  Nevertheless, since it's easy to do, we
            // use an atomic compare-and-exchange operation, which means that another thread might be in 
            // the process of backpropagation of second derivatives and the Hessians might have shifted slightly

            Parallel.For(0, weightCount, ParallelOption, jj =>
            {
                double oldValue, newValue;
                oldValue = Weights[jj].diagHessian;
                newValue = oldValue + d2Err_wrt_dWn[jj];
                Weights[jj].diagHessian = newValue;
            });
        }
      
        override public void Serialize(Archive ar)
        {
            int ii, jj;
	
	        if (ar.IsStoring())
	        {
		        // TODO: add storing code here
                
                // write layer's label    
                ar.Write(label);
                //write layter type
                ar.Write((int)type);
                //write neurons count
                ar.Write(neuronCount);
                //write weights count
                ar.Write(weightCount);
                //write feature maps count
                ar.Write(nFeatureMaps);
                //write size of feature map
                ar.Write(featureMapSize.Width);
                ar.Write(featureMapSize.Height);
                foreach (Neuron nit in Neurons)
                {
                    ar.Write(nit.label);
                    ar.Write(nit.ConnectionCount);
                    if (nit.Connections != null&&nit.ConnectionCount>0)
                    {
                        foreach (Connection cit in nit.Connections)
                        {
                            ar.Write(cit.NeuronIndex);
                            ar.Write(cit.WeightIndex);
                        }
                    }
                }
                if (weights != null&&WeightCount>0)
                {
                    foreach (Weight wit in Weights)
                    {
                        ar.Write(wit.label);
                        ar.Write(wit.value);
                    }
                }
		    }
	        else
	        {
		        // TODO: add loading code here
		        
		        string str;
		        //Read Layter's label
                ar.Read(out str);
		        label = str;
                //Read layter type
                int iType;
                ar.Read(out iType);
                type = (LayerTypes)iType;
		        int iNumNeurons, iNumWeights, iNumConnections;
		        double value;
		
		        Neuron pNeuron;
		        Weight pWeight;
                
		        //Read No of Neuron, Weight
		        ar.Read(out iNumNeurons);
		        ar.Read(out iNumWeights);
                neuronCount = iNumNeurons;
                weightCount = iNumWeights;
                int iFeatureMaps, iWidth, iHeight;
                //read feature maps count
                ar.Read(out iFeatureMaps);
                nFeatureMaps = iFeatureMaps;
                //read size of feature map
                ar.Read(out iWidth);
                ar.Read(out iHeight);
                featureMapSize.Width = iWidth;
                featureMapSize.Height = iHeight;
                //read connections and weights
                if (iNumNeurons != 0 )
                {
                    //clear neuron list and weight list.
                    
                    Neurons = new Neuron[iNumNeurons];
                    Weights = new Weight[iNumWeights];

                    for (ii = 0; ii < iNumNeurons; ii++)
                    {
                        //ar.Read Neuron's label
                        ar.Read(out str);
                        //Read Neuron's Connection number
                        ar.Read(out iNumConnections);
                        pNeuron = new Neuron(str, iNumConnections);
                        pNeuron.ConnectionCount = iNumConnections;
                        pNeuron.label = str;
                        Neurons[ii]=pNeuron;
                        for (jj = 0; jj < iNumConnections; jj++)
                        {
                            var conn = new Connection();
                            ar.Read(out conn.NeuronIndex);
                            ar.Read(out conn.WeightIndex);
                            pNeuron.AddConnection(conn,jj);
                        }
                    }

                    for (jj = 0; jj < iNumWeights; jj++)
                    {
                        ar.Read(out str);
                        ar.Read(out value);

                        pWeight = new Weight(str, value);
                        Weights[jj]=pWeight;
                    }
                }
		
	        }
        }
        protected virtual void CreateLayer()
        {
          
        }
    }
}

By viewing downloads associated with this article you agree to the Terms of Service and the article's licence.

If a file you wish to view isn't highlighted, and is a text file (not binary), please let us know and we'll add colourisation support for it.

License

This article, along with any associated source code and files, is licensed under The Code Project Open License (CPOL)


Written By
Vietnam Maritime University
Vietnam Vietnam
This member has not yet provided a Biography. Assume it's interesting and varied, and probably something to do with programming.

Comments and Discussions