using System;
using ANN.Perceptron.ArchiveSerialization;
using System.Drawing;
using System.Threading;
using System.Threading.Tasks;
using ANN.Perceptron.Common;
using ANN.Perceptron.Connections;
using ANN.Perceptron.Neurons;
using ANN.Perceptron.Weights;
namespace ANN.Perceptron.Layers
{
// Layer class
public class CommonLayer : NetworkProvider
{
protected string label;
protected CommonLayer prevLayer;
public CommonLayer PrevLayer
{
get
{
return prevLayer;
}
set
{
if (prevLayer == value)
return;
prevLayer = value;
}
}
protected int neuronCount;
protected Weight[] weights;
protected int weightCount;
protected ActivationFunction.SigmoidFunction sigmoid;
protected bool floatingPointWarning; // flag for one-time warning (per layer) about potential floating point overflow
protected Size featureMapSize;
protected int nFeatureMaps;
protected LayerTypes type;
protected Neuron[] neurons;
public LayerTypes LayerType
{
get
{
return type;
}
set
{
if (type == value)
return;
type = value;
}
}
public string Label
{
get
{
return label;
}
set
{
if (label == value)
return;
label = value;
}
}
public int FeatureMapCount
{
get
{
return nFeatureMaps;
}
set
{
if (nFeatureMaps == value)
return;
nFeatureMaps = value;
}
}
public int WeightCount
{
get
{
return weightCount;
}
set
{
if (weightCount == value)
return;
weightCount = value;
}
}
public int NeuronCount
{
get
{
return neuronCount;
}
set
{
if (neuronCount == value)
return;
neuronCount = value;
}
}
public Weight[] Weights
{
get
{
return weights;
}
set
{
if (weights == value)
return;
weights = value;
}
}
public Neuron[] Neurons
{
get
{
return neurons;
}
set
{
if (neurons == value)
return;
neurons = value;
}
}
public Size FeatureMapSize
{
get
{
return featureMapSize;
}
set
{
if (featureMapSize == value)
return;
featureMapSize = value;
}
}
public CommonLayer():base()
{
label = "";
prevLayer = null;
sigmoid = new ActivationFunction.SigmoidFunction();
Weights = null;
Neurons = null;
weightCount = 0;
neuronCount = 0;
}
public CommonLayer(String sLabel,CommonLayer prelayer)
: this()
{
label = sLabel;
prevLayer = prelayer;
}
public virtual void Initialize()
{
floatingPointWarning = false;
CreateLayer();
}
//Sigmoid activation function
public double SIGMOID(double x)
{
return (1.7159 * System.Math.Tanh(0.66666667 * x));
}
/// <summary>
/// // // derivative of the sigmoid as a function of the sigmoid's output
/// </summary>
/// <param name="S"></param>
/// <returns></returns>
public double DSIGMOID(double S)
{
return (0.66666667 / 1.7159 * (1.7159 + (S)) * (1.7159 - (S)));
}
public void Calculate()
{
if (prevLayer != null)
{
Parallel.ForEach(Neurons,ParallelOption, nit =>
{
double dSum = 0;
foreach (var cit in nit.Connections)
{
if (cit == nit.Connections[0])
{
dSum = (weights[(int)cit.WeightIndex].value);
}
else
{
dSum += (weights[(int)cit.WeightIndex].value) * (prevLayer.Neurons[(int)cit.NeuronIndex].output);
}
}
nit.output = SIGMOID(dSum);
});
}
}
/////////////
public void Backpropagate(double[] dErr_wrt_dXn /* in */,
double[] dErr_wrt_dXnm1 /* out */,
NeuronOutputs thisLayerOutput, // memorized values of this layer's output
NeuronOutputs prevLayerOutput, // memorized values of previous layer's output
double etaLearningRate)
{
// nomenclature (repeated from NeuralNetwork class):
//
// Err is output error of the entire neural net
// Xn is the output vector on the n-th layer
// Xnm1 is the output vector of the previous layer
// Wn is the vector of weights of the n-th layer
// Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
// F is the squashing function: Xn = F(Yn)
// F' is the derivative of the squashing function
// Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
try
{
int nIndex;
double[] dErr_wrt_dYn = new double[neuronCount];
//
// std::vector< double > dErr_wrt_dWn( m_Weights.size(), 0.0 ); // important to initialize to zero
//////////////////////////////////////////////////
//
///// DESIGN TRADEOFF: REVIEW !!
// We would prefer (for ease of coding) to use STL vector for the array "dErr_wrt_dWn", which is the
// differential of the current pattern's error wrt weights in the layer. However, for layers with
// many weights, such as fully-connected layers, there are also many weights. The STL vector
// class's allocator is remarkably stupid when allocating large memory chunks, and causes a remarkable
// number of page faults, with a consequent slowing of the application's overall execution time.
// To fix this, I tried using a plain-old C array, by new'ing the needed space from the heap, and
// delete[]'ing it at the end of the function. However, this caused the same number of page-fault
// errors, and did not improve performance.
// So I tried a plain-old C array allocated on the stack (i.e., not the heap). Of course I could not
// write a statement like
// double dErr_wrt_dWn[ m_Weights.size() ];
// since the compiler insists upon a compile-time known constant value for the size of the array.
// To avoid this requirement, I used the _alloca function, to allocate memory on the stack.
// The downside of this is excessive stack usage, and there might be stack overflow probelms. That's why
// this comment is labeled "REVIEW"
double[] dErr_wrt_dWn = new double[weightCount];
Parallel.For(0, weightCount,ParallelOption, ii =>
{
dErr_wrt_dWn[ii] = 0.0;
});
//ParallelOption.MaxDegreeOfParallelism = 1;
bool bMemorized = (thisLayerOutput != null) && (prevLayerOutput != null);
// calculate dErr_wrt_dYn = F'(Yn) * dErr_wrt_Xn
Parallel.For(0, neuronCount,ParallelOption, ii =>
{
double output;
if (bMemorized != false)
{
output = thisLayerOutput[ii];
}
else
{
output = Neurons[ii].output;
}
dErr_wrt_dYn[ii] = (DSIGMOID(output) * dErr_wrt_dXn[ii]);
});
// calculate dErr_wrt_Wn = Xnm1 * dErr_wrt_Yn
// For each neuron in this layer, go through the list of connections from the prior layer, and
// update the differential for the corresponding weight
Parallel.For(0, neuronCount,ParallelOption, index =>
{
var nit = neurons[index];
foreach (Connection cit in nit.Connections)
{
double output;
uint kk = cit.NeuronIndex;
if (kk == 0xffffffff)
{
output = 1.0; // this is the bias weight
}
else
{
if (bMemorized != false)
{
output = prevLayerOutput[(int)kk];
}
else
{
output = prevLayer.Neurons[(int)kk].output;
}
}
dErr_wrt_dWn[cit.WeightIndex] = dErr_wrt_dYn[index] * output;
}
});
// calculate dErr_wrt_Xnm1 = Wn * dErr_wrt_dYn, which is needed as the input value of
// dErr_wrt_Xn for backpropagation of the next (i.e., previous) layer
// For each neuron in this layer
Parallel.For(0, neuronCount, ParallelOption, index =>
{
var nit = neurons[index];
foreach (Connection cit in nit.Connections)
{
uint kk = cit.NeuronIndex;
if (kk != 0xffffffff)
{
// we exclude ULONG_MAX, which signifies the phantom bias neuron with
// constant output of "1", since we cannot train the bias neuron
nIndex = (int)kk;
dErr_wrt_dXnm1[nIndex] += dErr_wrt_dYn[index] * Weights[(int)cit.WeightIndex].value;
}
}
});
// finally, update the weights of this layer neuron using dErr_wrt_dW and the learning rate eta
// Use an atomic compare-and-exchange operation, which means that another thread might be in
// the process of backpropagation and the weights might have shifted slightly
Parallel.For(0, weightCount,ParallelOption, j =>
{
//const double dMicron = 0.10;
weights[j].value -= (etaLearningRate / (weights[j].diagHessian + 0.10)) * dErr_wrt_dWn[j];
});
}
catch (Exception ex)
{
return;
}
}
public void PeriodicWeightSanityCheck()
{
// called periodically by the neural net, to request a check on the "reasonableness" of the
// weights. The warning message is given only once per layer
if (weights != null)
{
Parallel.ForEach(weights,ParallelOption, wit =>
{
double val = System.Math.Abs(wit.value);
if ((val > 100.0) && (floatingPointWarning == false))
{
// 100.0 is an arbitrary value, that no reasonable weight should ever exceed
/*
string strMess = ""; ;
strMess.Format("Caution: Weights are becoming unboundedly large \n"+
"Layer: %s \nWeight: %s \nWeight value = %g \nWeight Hessian = %g\n\n"+
"Suggest abandoning this backpropagation and investigating",
label, wit.label, wit.value, wit.diagHessian );
//show message box
//MessageBox.show( NULL, strMess, _T( "Problem With Weights" ), MB_ICONEXCLAMATION | MB_OK );
*/
floatingPointWarning = true;
}
});
}
}
public void EraseHessianInformation()
{
// goes through all the weights associated with this layer, and sets each of their
// diagHessian value to zero
if (Weights != null)
{
Parallel.ForEach(Weights, ParallelOption,wit =>
{
wit.diagHessian = 0.0;
});
}
}
public void DivideHessianInformationBy(double divisor)
{
// goes through all the weights associated with this layer, and divides each of their
// diagHessian value by the indicated divisor
if (Weights != null)
{
Parallel.ForEach(Weights,ParallelOption, wit =>
{
double dTemp;
dTemp = wit.diagHessian;
if (dTemp < 0.0)
{
// it should not be possible to reach here, since all calculations for the second
// derviative are strictly zero-positive. However, there are some early indications
// that this check is necessary anyway
dTemp = 0.0;
}
wit.diagHessian = dTemp / divisor;
});
}
}
public void BackpropagateSecondDerivatives(double[] d2Err_wrt_dXn /* in */,
double[] d2Err_wrt_dXnm1 /* out */)
{
// nomenclature (repeated from NeuralNetwork class)
// NOTE: even though we are addressing SECOND derivatives ( and not first derivatives),
// we use nearly the same notation as if there were first derivatives, since otherwise the
// ASCII look would be confusing. We add one "2" but not two "2's", such as "d2Err_wrt_dXn",
// to give a gentle emphasis that we are using second derivatives
//
// Err is output error of the entire neural net
// Xn is the output vector on the n-th layer
// Xnm1 is the output vector of the previous layer
// Wn is the vector of weights of the n-th layer
// Yn is the activation value of the n-th layer, i.e., the weighted sum of inputs BEFORE the squashing function is applied
// F is the squashing function: Xn = F(Yn)
// F' is the derivative of the squashing function
// Conveniently, for F = tanh, then F'(Yn) = 1 - Xn^2, i.e., the derivative can be calculated from the output, without knowledge of the input
var d2Err_wrt_dYn = new double[neuronCount];
//
// std::vector< double > d2Err_wrt_dWn( m_Weights.size(), 0.0 ); // important to initialize to zero
//////////////////////////////////////////////////
//
///// DESIGN TRADEOFF: REVIEW !!
//
// Note that the reasoning of this comment is identical to that in the NNLayer::Backpropagate()
// function, from which the instant BackpropagateSecondDerivatives() function is derived from
//
// We would prefer (for ease of coding) to use STL vector for the array "d2Err_wrt_dWn", which is the
// second differential of the current pattern's error wrt weights in the layer. However, for layers with
// many weights, such as fully-connected layers, there are also many weights. The STL vector
// class's allocator is remarkably stupid when allocating large memory chunks, and causes a remarkable
// number of page faults, with a consequent slowing of the application's overall execution time.
// To fix this, I tried using a plain-old C array, by new'ing the needed space from the heap, and
// delete[]'ing it at the end of the function. However, this caused the same number of page-fault
// errors, and did not improve performance.
// So I tried a plain-old C array allocated on the stack (i.e., not the heap). Of course I could not
// write a statement like
// double d2Err_wrt_dWn[ m_Weights.size() ];
// since the compiler insists upon a compile-time known constant value for the size of the array.
// To avoid this requirement, I used the _alloca function, to allocate memory on the stack.
// The downside of this is excessive stack usage, and there might be stack overflow probelms. That's why
// this comment is labeled "REVIEW"
double[] d2Err_wrt_dWn = new double[weightCount];
Parallel.For(0, weightCount,ParallelOption, ii =>
{
d2Err_wrt_dWn[ii] = 0.0;
});
// calculate d2Err_wrt_dYn = ( F'(Yn) )^2 * dErr_wrt_Xn (where dErr_wrt_Xn is actually a second derivative )
Parallel.For(0, neuronCount,ParallelOption, ii =>
{
double output;
double dTemp;
output = Neurons[ii].output;
dTemp = ActivationFunction.SigmoidFunction.DSIGMOID(output);
d2Err_wrt_dYn[ii] = (d2Err_wrt_dXn[ii] * dTemp * dTemp);
});
// calculate d2Err_wrt_Wn = ( Xnm1 )^2 * d2Err_wrt_Yn (where dE2rr_wrt_Yn is actually a second derivative)
// For each neuron in this layer, go through the list of connections from the prior layer, and
// update the differential for the corresponding weight
Parallel.For(0, neuronCount,ParallelOption, i =>
{
/*TODO: Check potentially-changing upper bound expression "nit.ConnectionCount" which is now called only *once*,
to ensure the new Parallel.For call matches behavior in the original for-loop
(where this upper bound expression had previously been evaluated at the start of *every* loop iteration).*/
var nit = neurons[i];
foreach (var cit in nit.Connections)
{
try
{
double output;
uint kk = (uint)cit.NeuronIndex;
if (kk == 0xffffffff)
{
output = 1.0; // this is the bias connection; implied neuron output of "1"
}
else
{
output = prevLayer.Neurons[(int)kk].output;
}
//////////// ASSERT( (*cit).WeightIndex < d2Err_wrt_dWn.size() ); // since after changing d2Err_wrt_dWn to a C-style array, the size() function this won't work
//d2Err_wrt_dWn[cit.WeightIndex] += d2Err_wrt_dYn[ii] * output * output;
d2Err_wrt_dWn[cit.WeightIndex] = d2Err_wrt_dYn[i] * output * output;
}
catch (Exception ex)
{
}
}
});
// calculate d2Err_wrt_Xnm1 = ( Wn )^2 * d2Err_wrt_dYn (where d2Err_wrt_dYn is a second derivative not a first).
// d2Err_wrt_Xnm1 is needed as the input value of
// d2Err_wrt_Xn for backpropagation of second derivatives for the next (i.e., previous spatially) layer
// For each neuron in this layer
Parallel.For(0, neuronCount,ParallelOption, i =>
{
var nit = neurons[i];
foreach (var cit in nit.Connections)
{
try
{
uint kk = cit.NeuronIndex;
if (kk != 0xffffffff)
{
// we exclude ULONG_MAX, which signifies the phantom bias neuron with
// constant output of "1", since we cannot train the bias neuron
int nIndex;
nIndex = (int)kk;
double dTemp;
dTemp = Weights[(int)cit.WeightIndex].value;
dTemp *= dTemp;
d2Err_wrt_dXnm1[nIndex] += d2Err_wrt_dYn[i] * dTemp;
}
}
catch (Exception ex)
{
return;
}
}
});
// finally, update the diagonal Hessians for the weights of this layer neuron using dErr_wrt_dW.
// By design, this function (and its iteration over many (approx 500 patterns) is called while a
// single thread has locked the nueral network, so there is no possibility that another
// thread might change the value of the Hessian. Nevertheless, since it's easy to do, we
// use an atomic compare-and-exchange operation, which means that another thread might be in
// the process of backpropagation of second derivatives and the Hessians might have shifted slightly
Parallel.For(0, weightCount, ParallelOption, jj =>
{
double oldValue, newValue;
oldValue = Weights[jj].diagHessian;
newValue = oldValue + d2Err_wrt_dWn[jj];
Weights[jj].diagHessian = newValue;
});
}
override public void Serialize(Archive ar)
{
int ii, jj;
if (ar.IsStoring())
{
// TODO: add storing code here
// write layer's label
ar.Write(label);
//write layter type
ar.Write((int)type);
//write neurons count
ar.Write(neuronCount);
//write weights count
ar.Write(weightCount);
//write feature maps count
ar.Write(nFeatureMaps);
//write size of feature map
ar.Write(featureMapSize.Width);
ar.Write(featureMapSize.Height);
foreach (Neuron nit in Neurons)
{
ar.Write(nit.label);
ar.Write(nit.ConnectionCount);
if (nit.Connections != null&&nit.ConnectionCount>0)
{
foreach (Connection cit in nit.Connections)
{
ar.Write(cit.NeuronIndex);
ar.Write(cit.WeightIndex);
}
}
}
if (weights != null&&WeightCount>0)
{
foreach (Weight wit in Weights)
{
ar.Write(wit.label);
ar.Write(wit.value);
}
}
}
else
{
// TODO: add loading code here
string str;
//Read Layter's label
ar.Read(out str);
label = str;
//Read layter type
int iType;
ar.Read(out iType);
type = (LayerTypes)iType;
int iNumNeurons, iNumWeights, iNumConnections;
double value;
Neuron pNeuron;
Weight pWeight;
//Read No of Neuron, Weight
ar.Read(out iNumNeurons);
ar.Read(out iNumWeights);
neuronCount = iNumNeurons;
weightCount = iNumWeights;
int iFeatureMaps, iWidth, iHeight;
//read feature maps count
ar.Read(out iFeatureMaps);
nFeatureMaps = iFeatureMaps;
//read size of feature map
ar.Read(out iWidth);
ar.Read(out iHeight);
featureMapSize.Width = iWidth;
featureMapSize.Height = iHeight;
//read connections and weights
if (iNumNeurons != 0 )
{
//clear neuron list and weight list.
Neurons = new Neuron[iNumNeurons];
Weights = new Weight[iNumWeights];
for (ii = 0; ii < iNumNeurons; ii++)
{
//ar.Read Neuron's label
ar.Read(out str);
//Read Neuron's Connection number
ar.Read(out iNumConnections);
pNeuron = new Neuron(str, iNumConnections);
pNeuron.ConnectionCount = iNumConnections;
pNeuron.label = str;
Neurons[ii]=pNeuron;
for (jj = 0; jj < iNumConnections; jj++)
{
var conn = new Connection();
ar.Read(out conn.NeuronIndex);
ar.Read(out conn.WeightIndex);
pNeuron.AddConnection(conn,jj);
}
}
for (jj = 0; jj < iNumWeights; jj++)
{
ar.Read(out str);
ar.Read(out value);
pWeight = new Weight(str, value);
Weights[jj]=pWeight;
}
}
}
}
protected virtual void CreateLayer()
{
}
}
}