/*
Fast Artificial Neural Network Library (fann)
Copyright (C) 2003 Steffen Nissen (lukesky@diku.dk)
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
#include <stdio.h>
#include <stdlib.h>
#include <stdarg.h>
#include <string.h>
#include "config.h"
#include "fann.h"
#include "fann_errno.h"
/*#define DEBUGTRAIN*/
#ifndef FIXEDFANN
/* INTERNAL FUNCTION
Calculates the derived of a value, given an activation function
and a steepness
*/
static fann_type fann_activation_derived(unsigned int activation_function,
fann_type steepness, fann_type value)
{
switch(activation_function){
case FANN_LINEAR:
return (fann_type)fann_linear_derive(steepness, value);
case FANN_SIGMOID:
case FANN_SIGMOID_STEPWISE:
value = fann_clip(value, 0.01f, 0.99f);
return (fann_type)fann_sigmoid_derive(steepness, value);
case FANN_SIGMOID_SYMMETRIC:
case FANN_SIGMOID_SYMMETRIC_STEPWISE:
value = fann_clip(value, -0.98f, 0.98f);
return (fann_type)fann_sigmoid_symmetric_derive(steepness, value);
default:
return 0;
}
}
/* Trains the network with the backpropagation algorithm.
*/
FANN_EXTERNAL void FANN_API fann_train(struct fann *ann, fann_type *input, fann_type *desired_output)
{
fann_run(ann, input);
fann_compute_MSE(ann, desired_output);
fann_backpropagate_MSE(ann);
fann_update_weights(ann);
}
#endif
/* Tests the network.
*/
FANN_EXTERNAL fann_type * FANN_API fann_test(struct fann *ann, fann_type *input, fann_type *desired_output)
{
fann_type neuron_value;
fann_type *output_begin = fann_run(ann, input);
fann_type *output_it;
const fann_type *output_end = output_begin + ann->num_output;
fann_type neuron_diff;
/* calculate the error */
for(output_it = output_begin;
output_it != output_end; output_it++){
neuron_value = *output_it;
neuron_diff = (*desired_output - neuron_value);
if(ann->activation_function_output == FANN_SIGMOID_SYMMETRIC ||
ann->activation_function_output == FANN_SIGMOID_SYMMETRIC_STEPWISE){
neuron_diff /= (fann_type)2;
}
#ifdef FIXEDFANN
ann->MSE_value += (neuron_diff/(float)ann->multiplier) * (neuron_diff/(float)ann->multiplier);
#else
ann->MSE_value += (float)(neuron_diff * neuron_diff);
#endif
desired_output++;
}
ann->num_MSE++;
return output_begin;
}
/* get the mean square error.
(obsolete will be removed at some point, use fann_get_MSE)
*/
FANN_EXTERNAL float FANN_API fann_get_error(struct fann *ann)
{
return fann_get_MSE(ann);
}
/* get the mean square error.
*/
FANN_EXTERNAL float FANN_API fann_get_MSE(struct fann *ann)
{
if(ann->num_MSE){
return ann->MSE_value/(float)ann->num_MSE;
}else{
return 0;
}
}
/* reset the mean square error.
(obsolete will be removed at some point, use fann_reset_MSE)
*/
FANN_EXTERNAL void FANN_API fann_reset_error(struct fann *ann)
{
fann_reset_MSE(ann);
}
/* reset the mean square error.
*/
FANN_EXTERNAL void FANN_API fann_reset_MSE(struct fann *ann)
{
ann->num_MSE = 0;
ann->MSE_value = 0;
}
#ifndef FIXEDFANN
/* INTERNAL FUNCTION
compute the error at the network output
(usually, after forward propagation of a certain input vector, fann_run)
the error is a sum of squares for all the output units
also increments a counter because MSE is an average of such errors
After this train_errors in the output layer will be set to:
neuron_value_derived * (desired_output - neuron_value)
*/
void fann_compute_MSE(struct fann *ann, fann_type *desired_output)
{
fann_type neuron_value, neuron_diff, *error_it = 0, *error_begin = 0;
struct fann_neuron *last_layer_begin = (ann->last_layer-1)->first_neuron;
const struct fann_neuron *last_layer_end = last_layer_begin + ann->num_output;
const struct fann_neuron *first_neuron = ann->first_layer->first_neuron;
/* if no room allocated for the error variabels, allocate it now */
if(ann->train_errors == NULL){
ann->train_errors = (fann_type *)calloc(ann->total_neurons, sizeof(fann_type));
if(ann->train_errors == NULL){
fann_error((struct fann_error *)ann, FANN_E_CANT_ALLOCATE_MEM);
return;
}
}
/* clear the error variabels */
memset(ann->train_errors, 0, (ann->total_neurons) * sizeof(fann_type));
error_begin = ann->train_errors;
#ifdef DEBUGTRAIN
printf("\ncalculate errors\n");
#endif
/* calculate the error and place it in the output layer */
error_it = error_begin + (last_layer_begin - first_neuron);
for(; last_layer_begin != last_layer_end; last_layer_begin++){
neuron_value = last_layer_begin->value;
neuron_diff = *desired_output - neuron_value;
if(ann->activation_function_output == FANN_SIGMOID_SYMMETRIC ||
ann->activation_function_output == FANN_SIGMOID_SYMMETRIC_STEPWISE){
neuron_diff /= 2.0;
}
ann->MSE_value += (float)(neuron_diff * neuron_diff);
if(ann->train_error_function){ /* TODO make switch when more functions */
if ( neuron_diff < -.9999999 )
neuron_diff = -17.0;
else if ( neuron_diff > .9999999 )
neuron_diff = 17.0;
else
neuron_diff = (fann_type)log ( (1.0+neuron_diff) / (1.0-neuron_diff) );
}
*error_it = fann_activation_derived(ann->activation_function_output,
ann->activation_steepness_output, neuron_value) * neuron_diff;
desired_output++;
error_it++;
}
ann->num_MSE++;
}
/* INTERNAL FUNCTION
Propagate the error backwards from the output layer.
After this the train_errors in the hidden layers will be:
neuron_value_derived * sum(outgoing_weights * connected_neuron)
*/
void fann_backpropagate_MSE(struct fann *ann)
{
fann_type neuron_value, tmp_error;
unsigned int i;
struct fann_layer *layer_it;
struct fann_neuron *neuron_it, *last_neuron;
fann_type *error_begin = ann->train_errors;
fann_type *error_prev_layer;
const fann_type activation_steepness_hidden = ann->activation_steepness_hidden;
const struct fann_neuron *first_neuron = ann->first_layer->first_neuron;
const struct fann_layer *second_layer = ann->first_layer + 1;
struct fann_layer *last_layer = ann->last_layer;
/* go through all the layers, from last to first.
And propagate the error backwards */
for(layer_it = last_layer-1; layer_it > second_layer; --layer_it){
last_neuron = layer_it->last_neuron;
/* for each connection in this layer, propagate the error backwards*/
if(ann->connection_rate >= 1 && !ann->shortcut_connections){
/* optimization for fully connected networks */
/* but not shortcut connected networks */
error_prev_layer = error_begin + ((layer_it-1)->first_neuron - first_neuron);
for(neuron_it = layer_it->first_neuron;
neuron_it != last_neuron; neuron_it++){
tmp_error = error_begin[neuron_it - first_neuron];
for(i = neuron_it->num_connections ; i-- ; ){
error_prev_layer[i] += tmp_error * neuron_it->weights[i];
}
}
}else{
for(neuron_it = layer_it->first_neuron;
neuron_it != last_neuron; neuron_it++){
tmp_error = error_begin[neuron_it - first_neuron];
for(i = neuron_it->num_connections ; i-- ; ){
error_begin[neuron_it->connected_neurons[i] - first_neuron] += tmp_error * neuron_it->weights[i];
}
}
}
/* then calculate the actual errors in the previous layer */
error_prev_layer = error_begin + ((layer_it-1)->first_neuron - first_neuron);
last_neuron = (layer_it-1)->last_neuron;
switch(ann->activation_function_hidden){
case FANN_LINEAR:
for(neuron_it = (layer_it-1)->first_neuron;
neuron_it != last_neuron; neuron_it++){
neuron_value = neuron_it->value;
*error_prev_layer *= (fann_type)fann_linear_derive(activation_steepness_hidden, neuron_value);
error_prev_layer++;
}
break;
case FANN_SIGMOID:
case FANN_SIGMOID_STEPWISE:
for(neuron_it = (layer_it-1)->first_neuron;
neuron_it != last_neuron; neuron_it++){
neuron_value = neuron_it->value;
neuron_value = fann_clip(neuron_value, 0.01f, 0.99f);
*error_prev_layer *= (fann_type)fann_sigmoid_derive(activation_steepness_hidden, neuron_value);
error_prev_layer++;
}
break;
case FANN_SIGMOID_SYMMETRIC:
case FANN_SIGMOID_SYMMETRIC_STEPWISE:
for(neuron_it = (layer_it-1)->first_neuron;
neuron_it != last_neuron; neuron_it++){
neuron_value = neuron_it->value;
neuron_value = fann_clip(neuron_value, -0.98f, 0.98f);
*error_prev_layer *= (fann_type)fann_sigmoid_symmetric_derive(activation_steepness_hidden, neuron_value);
error_prev_layer++;
}
break;
default:
fann_error((struct fann_error *)ann, FANN_E_CANT_TRAIN_ACTIVATION);
return;
}
}
}
/* INTERNAL FUNCTION
Update weights for incremental training
*/
void fann_update_weights(struct fann *ann)
{
struct fann_neuron *neuron_it, *last_neuron, *prev_neurons;
fann_type tmp_error;
struct fann_layer *layer_it;
unsigned int i;
/* store some variabels local for fast access */
const float learning_rate = ann->learning_rate;
const struct fann_neuron *first_neuron = ann->first_layer->first_neuron;
struct fann_layer *first_layer = ann->first_layer;
const struct fann_layer *last_layer = ann->last_layer;
fann_type *error_begin = ann->train_errors;
#ifdef DEBUGTRAIN
printf("\nupdate weights\n");
#endif
for(layer_it = (first_layer+1); layer_it != last_layer; layer_it++){
#ifdef DEBUGTRAIN
printf("layer[%d]\n", layer_it - first_layer);
#endif
last_neuron = layer_it->last_neuron;
if(ann->connection_rate >= 1 && !ann->shortcut_connections){
/* optimization for fully connected networks */
/* but not shortcut connected networks */
prev_neurons = (layer_it-1)->first_neuron;
for(neuron_it = layer_it->first_neuron;
neuron_it != last_neuron; neuron_it++){
tmp_error = error_begin[neuron_it - first_neuron] * learning_rate;
for(i = neuron_it->num_connections ; i-- ; ){
neuron_it->weights[i] += tmp_error * prev_neurons[i].value;
}
}
}else{
for(neuron_it = layer_it->first_neuron;
neuron_it != last_neuron; neuron_it++){
tmp_error = error_begin[neuron_it - first_neuron] * learning_rate;
for(i = neuron_it->num_connections ; i-- ; ){
neuron_it->weights[i] += tmp_error * neuron_it->connected_neurons[i]->value;
}
}
}
}
}
/* INTERNAL FUNCTION
Update slopes for batch training
*/
void fann_update_slopes_batch(struct fann *ann)
{
struct fann_neuron *neuron_it, *last_neuron, *prev_neurons;
fann_type tmp_error, *weights_begin;
struct fann_layer *layer_it;
unsigned int i;
/* store some variabels local for fast access */
const struct fann_neuron *first_neuron = ann->first_layer->first_neuron;
struct fann_layer *first_layer = ann->first_layer;
const struct fann_layer *last_layer = ann->last_layer;
fann_type *error_begin = ann->train_errors;
fann_type *slope_begin, *neuron_slope;
/* if no room allocated for the slope variabels, allocate it now */
if(ann->train_slopes == NULL){
ann->train_slopes = (fann_type *)calloc(ann->total_connections, sizeof(fann_type));
if(ann->train_slopes == NULL){
fann_error((struct fann_error *)ann, FANN_E_CANT_ALLOCATE_MEM);
return;
}
memset(ann->train_slopes, 0, (ann->total_connections) * sizeof(fann_type));
}
slope_begin = ann->train_slopes;
weights_begin = fann_get_weights(ann);
#ifdef DEBUGTRAIN
printf("\nupdate slopes\n");
#endif
for(layer_it = (first_layer+1); layer_it != last_layer; layer_it++){
#ifdef DEBUGTRAIN
printf("layer[%d]\n", layer_it - first_layer);
#endif
last_neuron = layer_it->last_neuron;
if(ann->connection_rate >= 1 && !ann->shortcut_connections){
/* optimization for fully connected networks */
/* but not shortcut connected networks */
prev_neurons = (layer_it-1)->first_neuron;
for(neuron_it = layer_it->first_neuron;
neuron_it != last_neuron; neuron_it++){
tmp_error = error_begin[neuron_it - first_neuron];
neuron_slope = slope_begin + (neuron_it->weights - weights_begin);
for(i = neuron_it->num_connections ; i-- ; ){
neuron_slope[i] += tmp_error * prev_neurons[i].value;
}
}
}else{
for(neuron_it = layer_it->first_neuron;
neuron_it != last_neuron; neuron_it++){
tmp_error = error_begin[neuron_it - first_neuron];
neuron_slope = slope_begin + (neuron_it->weights - weights_begin);
for(i = neuron_it->num_connections ; i-- ; ){
neuron_slope[i] += tmp_error * neuron_it->connected_neurons[i]->value;
}
}
}
}
}
/* INTERNAL FUNCTION
Clears arrays used for training before a new training session.
Also creates the arrays that do not exist yet.
*/
void fann_clear_train_arrays(struct fann *ann)
{
unsigned int i;
/* if no room allocated for the slope variabels, allocate it now */
if(ann->train_slopes == NULL){
ann->train_slopes = (fann_type *)calloc(ann->total_connections, sizeof(fann_type));
if(ann->train_slopes == NULL){
fann_error((struct fann_error *)ann, FANN_E_CANT_ALLOCATE_MEM);
return;
}
}
memset(ann->train_slopes, 0, (ann->total_connections) * sizeof(fann_type));
/* if no room allocated for the variabels, allocate it now */
if(ann->prev_steps == NULL){
ann->prev_steps = (fann_type *)calloc(ann->total_connections, sizeof(fann_type));
if(ann->prev_steps == NULL){
fann_error((struct fann_error *)ann, FANN_E_CANT_ALLOCATE_MEM);
return;
}
}
memset(ann->prev_steps, 0, (ann->total_connections) * sizeof(fann_type));
/* if no room allocated for the variabels, allocate it now */
if(ann->prev_train_slopes == NULL){
ann->prev_train_slopes = (fann_type *)calloc(ann->total_connections, sizeof(fann_type));
if(ann->prev_train_slopes == NULL){
fann_error((struct fann_error *)ann, FANN_E_CANT_ALLOCATE_MEM);
return;
}
}
if(ann->training_algorithm == FANN_TRAIN_RPROP){
for(i = 0; i < ann->total_connections; i++){
ann->prev_train_slopes[i] = (fann_type)0.0125;
}
} else {
memset(ann->prev_train_slopes, 0, (ann->total_connections) * sizeof(fann_type));
}
}
/* INTERNAL FUNCTION
Update weights for batch training
*/
void fann_update_weights_batch(struct fann *ann, unsigned int num_data)
{
fann_type *train_slopes = ann->train_slopes;
fann_type *weights = fann_get_weights(ann);
const float epsilon = ann->learning_rate/num_data;
unsigned int i = ann->total_connections;
while(i--){
weights[i] += train_slopes[i] * epsilon;
train_slopes[i] = 0.0;
}
}
/* INTERNAL FUNCTION
The quickprop training algorithm
*/
void fann_update_weights_quickprop(struct fann *ann, unsigned int num_data)
{
fann_type *train_slopes = ann->train_slopes;
fann_type *weights = fann_get_weights(ann);
fann_type *prev_steps = ann->prev_steps;
fann_type *prev_train_slopes = ann->prev_train_slopes;
fann_type w, prev_step, slope, prev_slope, next_step;
float epsilon = ann->learning_rate/num_data;
float decay = ann->quickprop_decay; /*-0.0001;*/
float mu = ann->quickprop_mu; /*1.75;*/
float shrink_factor = (float)(mu / (1.0 + mu));
unsigned int i = ann->total_connections;
while(i--){
w = weights[i];
prev_step = prev_steps[i];
slope = train_slopes[i] + decay * w;
prev_slope = prev_train_slopes[i];
next_step = 0.0;
/* The step must always be in direction opposite to the slope. */
if(prev_step > 0.001) {
/* If last step was positive... */
if(slope > 0.0) {
/* Add in linear term if current slope is still positive.*/
next_step += epsilon * slope;
}
/*If current slope is close to or larger than prev slope... */
if(slope > (shrink_factor * prev_slope)) {
next_step += mu * prev_step; /* Take maximum size negative step. */
} else {
next_step += prev_step * slope / (prev_slope - slope); /* Else, use quadratic estimate. */
}
} else if(prev_step < -0.001){
/* If last step was negative... */
if(slope < 0.0){
/* Add in linear term if current slope is still negative.*/
next_step += epsilon * slope;
}
/* If current slope is close to or more neg than prev slope... */
if(slope < (shrink_factor * prev_slope)){
next_step += mu * prev_step; /* Take maximum size negative step. */
} else {
next_step += prev_step * slope / (prev_slope - slope); /* Else, use quadratic estimate. */
}
} else {
/* Last step was zero, so use only linear term. */
next_step += epsilon * slope;
}
/* update global data arrays */
prev_steps[i] = next_step;
weights[i] = w + next_step;
prev_train_slopes[i] = slope;
train_slopes[i] = 0.0;
}
}
/* INTERNAL FUNCTION
The iRprop- algorithm
*/
void fann_update_weights_irpropm(struct fann *ann, unsigned int num_data)
{
fann_type *train_slopes = ann->train_slopes;
fann_type *weights = fann_get_weights(ann);
fann_type *prev_steps = ann->prev_steps;
fann_type *prev_train_slopes = ann->prev_train_slopes;
fann_type prev_step, slope, prev_slope, next_step, same_sign;
/* These should be set from variables */
float increase_factor = ann->rprop_increase_factor;/*1.2;*/
float decrease_factor = ann->rprop_decrease_factor;/*0.5;*/
float delta_min = ann->rprop_delta_min;/*0.0;*/
float delta_max = ann->rprop_delta_max;/*50.0;*/
unsigned int i = ann->total_connections;
while(i--){
prev_step = fann_max(prev_steps[i], (fann_type)0.001); /* prev_step may not be zero because then the training will stop */
slope = train_slopes[i];
prev_slope = prev_train_slopes[i];
next_step = 0.0;
same_sign = prev_slope * slope;
if(same_sign > 0.0) {
next_step = fann_min(prev_step * increase_factor, delta_max);
} else if(same_sign < 0.0) {
next_step = fann_max(prev_step * decrease_factor, delta_min);
slope = 0;
}
if(slope < 0){
weights[i] -= next_step;
}else{
weights[i] += next_step;
}
/*if(i == 2){
printf("weight=%f, slope=%f, next_step=%f, prev_step=%f\n", weights[i], slope, next_step, prev_step);
}*/
/* update global data arrays */
prev_steps[i] = next_step;
prev_train_slopes[i] = slope;
train_slopes[i] = 0.0;
}
}
#endif