Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- using System;
- // See "A Direct Adaptive Method for Faster Backpropagation Learning: The RPROP Algorithm",
- // M. Riedmiller and H. Braun,
- // Proceedings of the 1993 IEEE International Conference on Neural Networks,
- // pp. 586-591
- // This is the orginal version of the algorithm. There are many later variations.
- // (c) Dr. James McCaffrey
- //
- namespace ResilientBackProp
- {
- class RpropProgram
- {
- static void Main(string[] args)
- {
- Console.WriteLine("\nBegin neural network with Resilient Back-Propagation (RPROP) training demo");
- int numInput = 4; // number features
- int numHidden = 5;
- int numOutput = 3; // number of classes for Y
- int numRows = 10000;
- Console.WriteLine("\nGenerating " + numRows +
- " artificial data items with " + numInput + " features");
- double[][] allData = MakeAllData(numInput, numHidden, numOutput, numRows);
- Console.WriteLine("Done");
- Console.WriteLine("\nCreating train (80%) and test (20%) matrices");
- double[][] trainData;
- double[][] testData;
- MakeTrainTest(allData, 0.80, out trainData, out testData);
- Console.WriteLine("Done");
- Console.WriteLine("\nTraining data: \n");
- ShowData(trainData, 4, 2, true);
- Console.WriteLine("Test data: \n");
- ShowData(testData, 3, 2, true);
- Console.WriteLine("Creating a 4-5-3 neural network");
- NeuralNetwork nn = new NeuralNetwork(numInput, numHidden, numOutput);
- int maxEpochs = 1000;
- Console.WriteLine("\nSetting maxEpochs = " + maxEpochs);
- Console.WriteLine("\nStarting RPROP training");
- double[] weights = nn.TrainRPROP(trainData, maxEpochs); // RPROP
- Console.WriteLine("Done");
- Console.WriteLine("\nFinal neural network model weights:\n");
- ShowVector(weights, 4, 10, true);
- double trainAcc = nn.Accuracy(trainData, weights);
- Console.WriteLine("\nAccuracy on training data = " + trainAcc.ToString("F4"));
- double testAcc = nn.Accuracy(testData, weights);
- Console.WriteLine("\nAccuracy on test data = " + testAcc.ToString("F4"));
- Console.WriteLine("\nEnd neural network with Resilient Propagation demo\n");
- Console.ReadLine();
- } // Main
- static double[][] MakeAllData(int numInput, int numHidden, int numOutput, int numRows)
- {
- Random rnd = new Random();
- int numWeights = (numInput * numHidden) + numHidden +
- (numHidden * numOutput) + numOutput;
- double[] weights = new double[numWeights]; // actually weights & biases
- for (int i = 0; i < numWeights; ++i)
- weights[i] = 20.0 * rnd.NextDouble() - 10.0; // [-10.0 to -10.0]
- Console.WriteLine("Generating weights:");
- ShowVector(weights, 4, 10, true);
- double[][] result = new double[numRows][]; // allocate return-result matrix
- for (int i = 0; i < numRows; ++i)
- result[i] = new double[numInput + numOutput]; // 1-of-N Y in last column
- NeuralNetwork gnn =
- new NeuralNetwork(numInput, numHidden, numOutput); // generating NN
- gnn.SetWeights(weights);
- for (int r = 0; r < numRows; ++r) // for each row
- {
- // generate random inputs
- double[] inputs = new double[numInput];
- for (int i = 0; i < numInput; ++i)
- inputs[i] = 20.0 * rnd.NextDouble() - 10.0; // [-10.0 to -10.0]
- // compute outputs
- double[] outputs = gnn.ComputeOutputs(inputs);
- // translate outputs to 1-of-N
- double[] oneOfN = new double[numOutput]; // all 0.0
- int maxIndex = 0;
- double maxValue = outputs[0];
- for (int i = 0; i < numOutput; ++i)
- {
- if (outputs[i] > maxValue)
- {
- maxIndex = i;
- maxValue = outputs[i];
- }
- }
- oneOfN[maxIndex] = 1.0;
- // place inputs and 1-of-N output values into curr row
- int c = 0; // column into result[][]
- for (int i = 0; i < numInput; ++i) // inputs
- result[r][c++] = inputs[i];
- for (int i = 0; i < numOutput; ++i) // outputs
- result[r][c++] = oneOfN[i];
- } // each row
- return result;
- } // MakeAllData
- static void MakeTrainTest(double[][] allData, double trainPct, out double[][] trainData, out double[][] testData)
- {
- Random rnd = new Random();
- int totRows = allData.Length;
- int numTrainRows = (int)(totRows * trainPct); // usually 0.80
- int numTestRows = totRows - numTrainRows;
- trainData = new double[numTrainRows][];
- testData = new double[numTestRows][];
- double[][] copy = new double[allData.Length][]; // ref copy of all data
- for (int i = 0; i < copy.Length; ++i)
- copy[i] = allData[i];
- for (int i = 0; i < copy.Length; ++i) // scramble order
- {
- int r = rnd.Next(i, copy.Length); // use Fisher-Yates
- double[] tmp = copy[r];
- copy[r] = copy[i];
- copy[i] = tmp;
- }
- for (int i = 0; i < numTrainRows; ++i)
- trainData[i] = copy[i];
- for (int i = 0; i < numTestRows; ++i)
- testData[i] = copy[i + numTrainRows];
- } // MakeTrainTest
- public static void ShowData(double[][] data, int numRows,
- int decimals, bool indices)
- {
- int len = data.Length.ToString().Length;
- for (int i = 0; i < numRows; ++i)
- {
- if (indices == true)
- Console.Write("[" + i.ToString().PadLeft(len) + "] ");
- for (int j = 0; j < data[i].Length; ++j)
- {
- double v = data[i][j];
- if (v >= 0.0)
- Console.Write(" "); // '+'
- Console.Write(v.ToString("F" + decimals) + " ");
- }
- Console.WriteLine("");
- }
- Console.WriteLine(". . .");
- int lastRow = data.Length - 1;
- if (indices == true)
- Console.Write("[" + lastRow.ToString().PadLeft(len) + "] ");
- for (int j = 0; j < data[lastRow].Length; ++j)
- {
- double v = data[lastRow][j];
- if (v >= 0.0)
- Console.Write(" "); // '+'
- Console.Write(v.ToString("F" + decimals) + " ");
- }
- Console.WriteLine("\n");
- }
- public static void ShowVector(double[] vector, int decimals,
- int lineLen, bool newLine)
- {
- for (int i = 0; i < vector.Length; ++i)
- {
- if (i > 0 && i % lineLen == 0) Console.WriteLine("");
- if (vector[i] >= 0) Console.Write(" ");
- Console.Write(vector[i].ToString("F" + decimals) + " ");
- }
- if (newLine == true)
- Console.WriteLine("");
- }
- } // Program
- public struct WeightComposite
- {
- public double[][] weights;
- public double[] biases;
- }
- public class NeuralNetwork
- {
- const double etaPlus = 1.2; // values are from the paper
- const double etaMinus = 0.5;
- const double deltaMax = 50.0;
- const double deltaMin = 1.0E-6;
- private double[][] values;
- private double[][] biases;
- private double[][][] weights;
- private Random rnd;
- const int layer_count = 3;
- int[] sizes;
- public NeuralNetwork(int numInput, int numHidden, int numOutput)
- {
- this.sizes = new int[NeuralNetwork.layer_count];
- this.sizes[0] = numInput;
- this.sizes[1] = numHidden;
- this.sizes[2] = numOutput;
- this.values = new double[NeuralNetwork.layer_count][];
- this.biases = new double[NeuralNetwork.layer_count][];
- this.weights = new double[NeuralNetwork.layer_count][][];
- for (int layer = 0; layer < NeuralNetwork.layer_count; layer++)
- {
- this.values[layer] = new double[this.sizes[layer]];
- }
- for (int layer = 1; layer < NeuralNetwork.layer_count; layer++)
- {
- this.biases[layer] = new double[this.sizes[layer]];
- this.weights[layer] = MakeMatrix(this.sizes[layer - 1], this.sizes[layer], 0.0);
- }
- this.rnd = new Random();
- this.InitializeWeights(); // all weights and biases
- } // ctor
- private static double[][] MakeMatrix(int rows, int cols, double v) // helper for ctor, Train
- {
- double[][] result = new double[rows][];
- for (int r = 0; r < result.Length; ++r)
- result[r] = new double[cols];
- for (int i = 0; i < rows; ++i)
- for (int j = 0; j < cols; ++j)
- result[i][j] = v;
- return result;
- }
- private static double[] MakeVector(int len, double v) // helper for Train
- {
- double[] result = new double[len];
- for (int i = 0; i < len; ++i)
- result[i] = v;
- return result;
- }
- private void InitializeWeights() // helper for ctor
- {
- // initialize weights and biases to random values between 0.0001 and 0.001
- int numWeights = (this.sizes[0] * this.sizes[1]) + (this.sizes[1] * this.sizes[NeuralNetwork.layer_count - 1]) + this.sizes[1] + this.sizes[NeuralNetwork.layer_count - 1];
- double[] initialWeights = new double[numWeights];
- for (int i = 0; i < initialWeights.Length; ++i)
- initialWeights[i] = (0.001 - 0.0001) * rnd.NextDouble() + 0.0001;
- this.SetWeights(initialWeights);
- }
- public double[] TrainRPROP(double[][] trainData, int maxEpochs) // using RPROP
- {
- double[][] allGradTerms = new double[layer_count][];
- WeightComposite[] allGradsAcc = new WeightComposite[layer_count];
- WeightComposite[] allPrevGradsAcc = new WeightComposite[layer_count];
- WeightComposite[] allPrevDeltas = new WeightComposite[layer_count];
- for (int layer = 1; layer < layer_count; layer++)
- {
- int size = sizes[layer];
- int prev_size = sizes[layer - 1];
- allGradTerms[layer] = new double[size];
- allGradsAcc[layer].weights = MakeMatrix(prev_size, size, 0.0);
- allGradsAcc[layer].biases = new double[size];
- allPrevGradsAcc[layer].weights = MakeMatrix(prev_size, size, 0.0);
- allPrevGradsAcc[layer].biases = new double[size];
- allPrevDeltas[layer].weights = MakeMatrix(prev_size, size, 0.01);
- allPrevDeltas[layer].biases = MakeVector(size, 0.01);
- }
- int epoch = 0;
- while (epoch < maxEpochs)
- {
- ++epoch;
- if (epoch % 100 == 0 && epoch != maxEpochs)
- {
- double[] currWts = this.GetWeights();
- double err = MeanSquaredError(trainData, currWts);
- Console.WriteLine("epoch = " + epoch + " err = " + err.ToString("F4"));
- }
- // 1. compute and accumulate all gradients
- for (int layer = 1; layer < layer_count; layer++)
- {
- ZeroOut(allGradsAcc[layer].weights);// zero-out values from prev iteration
- ZeroOut(allGradsAcc[layer].biases);
- }
- for (int row = 0; row < trainData.Length; ++row) // walk thru all training data
- {
- double[] xValues = new double[this.sizes[0]]; // inputs
- double[] tValues = new double[this.sizes[2]]; // target values
- // no need to visit in random order because all rows processed before any updates ('batch')
- Array.Copy(trainData[row], xValues, this.sizes[0]); // get the inputs
- Array.Copy(trainData[row], this.sizes[0], tValues, 0, this.sizes[2]); // get the target values
- ComputeOutputs(xValues); // copy xValues in, compute outputs using curr weights (and store outputs internally)
- // compute the h-o gradient term/component as in regular back-prop
- // this term usually is lower case Greek delta but there are too many other deltas below
- for (int i = 0; i < this.sizes[NeuralNetwork.layer_count - 1]; ++i)
- {
- double value = this.values[NeuralNetwork.layer_count - 1][i];
- double derivative = (1 - value) * value; // derivative of softmax = (1 - y) * y (same as log-sigmoid)
- allGradTerms[NeuralNetwork.layer_count - 1][i] = derivative * (value - tValues[i]); // careful with O-T vs. T-O, O-T is the most usual
- }
- // compute the i-h gradient term/component as in regular back-prop
- for (int layer = layer_count - 2; layer >= 1; layer--)
- {
- for (int i = 0; i < this.sizes[layer]; ++i)
- {
- double derivative = (1 - this.values[layer][i]) * (1 + this.values[layer][i]); // derivative of tanh = (1 - y) * (1 + y)
- double sum = 0.0;
- for (int j = 0; j < this.sizes[layer + 1]; ++j) // each hidden delta is the sum of sizes[2] terms
- {
- double x = allGradTerms[layer + 1][j] * this.weights[layer + 1][i][j];
- sum += x;
- }
- allGradTerms[layer][i] = derivative * sum;
- }
- }
- for (int layer = layer_count - 1; layer >= 1; layer--)
- {
- for (int j = 0; j < this.sizes[layer]; ++j)
- {
- // the (hidden-to-) output bias gradients
- allGradsAcc[layer].biases[j] += allGradTerms[layer][j];
- // add input to h-o component to make h-o weight gradients, and accumulate
- for (int i = 0; i < this.sizes[layer - 1]; ++i)
- {
- double grad = allGradTerms[layer][j] * this.values[layer - 1][i];
- allGradsAcc[layer].weights[i][j] += grad;
- }
- }
- }
- } // for (int row = 0; row < trainData.Length; ++row
- ///////////////////////////////////////////////////////////////////////////////////////////////
- // end compute all gradients
- // update all weights and biases (in any order)
- for (int layer = 1; layer < layer_count; layer++)
- {
- int size = sizes[layer];
- int previous_size = sizes[layer - 1];
- // update input-hidden weights
- for (int j = 0; j < size; ++j)
- {
- double delta, t;
- // Weights
- for (int i = 0; i < previous_size; ++i)
- {
- delta = allPrevDeltas[layer].weights[i][j];
- t = allPrevGradsAcc[layer].weights[i][j] * allGradsAcc[layer].weights[i][j];
- if (t > 0) // no sign change, increase delta
- {
- delta *= NeuralNetwork.etaPlus; // compute delta
- if (delta > deltaMax) delta = deltaMax; // keep it in range
- double tmp = -Math.Sign(allGradsAcc[layer].weights[i][j]) * delta; // determine direction and magnitude
- this.weights[layer][i][j] += tmp; // update weights
- }
- else if (t < 0) // grad changed sign, decrease delta
- {
- delta *= NeuralNetwork.etaMinus; // the delta (not used, but saved for later)
- if (delta < deltaMin) delta = deltaMin; // keep it in range
- this.weights[layer][i][j] -= allPrevDeltas[layer].weights[i][j]; // revert to previous weight
- allGradsAcc[layer].weights[i][j] = 0; // forces next if-then branch, next iteration
- }
- else // this happens next iteration after 2nd branch above (just had a change in gradient)
- {
- double tmp = -Math.Sign(allGradsAcc[layer].weights[i][j]) * delta; // determine direction
- this.weights[layer][i][j] += tmp; // update
- }
- allPrevDeltas[layer].weights[i][j] = delta; // save delta
- allPrevGradsAcc[layer].weights[i][j] = allGradsAcc[layer].weights[i][j]; // save the (accumulated) gradient
- } // j
- // Biases
- delta = allPrevDeltas[layer].biases[j];
- t = allPrevGradsAcc[layer].biases[j] * allGradsAcc[layer].biases[j];
- if (t > 0) // no sign change, increase delta
- {
- delta *= NeuralNetwork.etaPlus; // compute delta
- if (delta > NeuralNetwork.deltaMax) delta = NeuralNetwork.deltaMax;
- double tmp = -Math.Sign(allGradsAcc[layer].biases[j]) * delta; // determine direction
- this.biases[layer][j] += tmp; // update
- }
- else if (t < 0) // grad changed sign, decrease delta
- {
- delta *= NeuralNetwork.etaMinus; // the delta (not used, but saved later)
- if (delta < NeuralNetwork.deltaMin) delta = NeuralNetwork.deltaMin;
- this.biases[layer][j] -= allPrevDeltas[layer].biases[j]; // revert to previous weight
- allGradsAcc[layer].biases[j] = 0; // forces next branch, next iteration
- }
- else // this happens next iteration after 2nd branch above (just had a change in gradient)
- {
- if (delta > deltaMax) delta = deltaMax;
- else if (delta < NeuralNetwork.deltaMin) delta = NeuralNetwork.deltaMin;
- // no way should delta be 0 . . .
- double tmp = -Math.Sign(allGradsAcc[layer].biases[j]) * delta; // determine direction
- this.biases[layer][j] += tmp; // update
- }
- allPrevDeltas[layer].biases[j] = delta;
- allPrevGradsAcc[layer].biases[j] = allGradsAcc[layer].biases[j];
- } // i
- }// for (int layer = 1; layer < layer_count; layer++)
- } // while
- double[] wts = this.GetWeights();
- return wts;
- } // Train
- private static void ZeroOut(double[][] matrix)
- {
- for (int i = 0; i < matrix.Length; ++i)
- for (int j = 0; j < matrix[i].Length; ++j)
- matrix[i][j] = 0.0;
- }
- private static void ZeroOut(double[] array) // helper for Train
- {
- for (int i = 0; i < array.Length; ++i)
- array[i] = 0.0;
- }
- public void SetWeights(double[] weights)
- {
- // copy weights and biases in weights[] array to i-h weights, i-h biases, h-o weights, h-o biases
- int numWeights = (this.sizes[0] * this.sizes[1]) +
- (this.sizes[1] * this.sizes[NeuralNetwork.layer_count - 1]) + this.sizes[1] +
- this.sizes[NeuralNetwork.layer_count - 1];
- if (weights.Length != numWeights)
- throw new Exception("Bad weights array in SetWeights");
- int k = 0; // points into weights param
- for (int layer = 1; layer < NeuralNetwork.layer_count; layer++)
- {
- for (int i = 0; i < this.sizes[layer - 1]; ++i)
- for (int j = 0; j < this.sizes[layer]; ++j)
- this.weights[layer][i][j] = weights[k++];
- for (int i = 0; i < this.sizes[layer]; ++i)
- this.biases[layer][i] = weights[k++];
- }
- }
- public double[] GetWeights()
- {
- int numWeights = (this.sizes[0] * this.sizes[1]) +
- (this.sizes[1] * this.sizes[NeuralNetwork.layer_count - 1]) + this.sizes[1] +
- this.sizes[NeuralNetwork.layer_count - 1];
- double[] result = new double[numWeights];
- int k = 0;
- for (int layer = 1; layer < NeuralNetwork.layer_count; layer++)
- {
- for (int i = 0; i < this.weights[layer].Length; ++i)
- for (int j = 0; j < this.weights[layer][0].Length; ++j)
- result[k++] = this.weights[layer][i][j];
- for (int i = 0; i < this.biases[layer].Length; ++i)
- result[k++] = this.biases[layer][i];
- }
- return result;
- }
- public double[] ComputeOutputs(double[] xValues)
- {
- Array.Copy(xValues, this.values[0], this.sizes[0]);
- for (int layer = 1; layer < NeuralNetwork.layer_count; layer++)
- {
- double[] sums = new double[this.sizes[layer]]; // hidden nodes sums scratch array
- Array.Copy(this.biases[layer], sums, this.sizes[layer]);
- for (int j = 0; j < this.sizes[layer]; ++j) // compute i-h sum of weights * inputs
- for (int i = 0; i < this.sizes[layer - 1]; ++i)
- sums[j] += this.values[layer - 1][i] * this.weights[layer][i][j]; // note +=
- if (layer < NeuralNetwork.layer_count - 1)
- {
- for (int i = 0; i < this.sizes[layer]; ++i) // apply activation
- this.values[layer][i] = HyperTan(sums[i]); // hard-coded
- }
- else
- {
- this.values[NeuralNetwork.layer_count - 1] = Softmax(sums);
- }
- }
- double[] retResult = new double[this.sizes[NeuralNetwork.layer_count - 1]]; // could define a GetOutputs method instead
- Array.Copy(this.values[NeuralNetwork.layer_count - 1], retResult, retResult.Length);
- return retResult;
- }
- private static double HyperTan(double x)
- {
- if (x < -20.0) return -1.0; // approximation is correct to 30 decimals
- else if (x > 20.0) return 1.0;
- else return Math.Tanh(x);
- }
- private static double[] Softmax(double[] oSums)
- {
- // does all output nodes at once so scale doesn't have to be re-computed each time
- // determine max output-sum
- double max = oSums[0];
- for (int i = 0; i < oSums.Length; ++i)
- if (oSums[i] > max) max = oSums[i];
- // determine scaling factor -- sum of exp(each val - max)
- double scale = 0.0;
- for (int i = 0; i < oSums.Length; ++i)
- scale += Math.Exp(oSums[i] - max);
- double[] result = new double[oSums.Length];
- for (int i = 0; i < oSums.Length; ++i)
- result[i] = Math.Exp(oSums[i] - max) / scale;
- return result; // now scaled so that xi sum to 1.0
- }
- public double Accuracy(double[][] testData, double[] weights)
- {
- this.SetWeights(weights);
- // percentage correct using winner-takes all
- int numCorrect = 0;
- int numWrong = 0;
- double[] xValues = new double[this.sizes[0]]; // inputs
- double[] tValues = new double[this.sizes[NeuralNetwork.layer_count - 1]]; // targets
- double[] yValues; // computed Y
- for (int i = 0; i < testData.Length; ++i)
- {
- Array.Copy(testData[i], xValues, this.sizes[0]); // parse data into x-values and t-values
- Array.Copy(testData[i], this.sizes[0], tValues, 0, this.sizes[NeuralNetwork.layer_count - 1]);
- yValues = this.ComputeOutputs(xValues);
- int maxIndex = MaxIndex(yValues); // which cell in yValues has largest value?
- if (tValues[maxIndex] == 1.0) // ugly. consider AreEqual(double x, double y, double epsilon)
- ++numCorrect;
- else
- ++numWrong;
- }
- return (numCorrect * 1.0) / (numCorrect + numWrong); // ugly 2 - check for divide by zero
- }
- public double MeanSquaredError(double[][] trainData, double[] weights)
- {
- this.SetWeights(weights); // copy the weights to evaluate in
- double[] xValues = new double[this.sizes[0]]; // this.values[0]
- double[] tValues = new double[this.sizes[NeuralNetwork.layer_count - 1]]; // targets
- double sumSquaredError = 0.0;
- for (int i = 0; i < trainData.Length; ++i) // walk through each training data item
- {
- // following assumes data has all x-values first, followed by y-values!
- Array.Copy(trainData[i], xValues, this.sizes[0]); // extract inputs
- Array.Copy(trainData[i], this.sizes[0], tValues, 0, this.sizes[NeuralNetwork.layer_count - 1]); // extract targets
- double[] yValues = this.ComputeOutputs(xValues);
- for (int j = 0; j < yValues.Length; ++j)
- sumSquaredError += ((yValues[j] - tValues[j]) * (yValues[j] - tValues[j]));
- }
- return sumSquaredError / trainData.Length;
- }
- private static int MaxIndex(double[] vector) // helper for Accuracy()
- {
- // index of largest value
- int bigIndex = 0;
- double biggestVal = vector[0];
- for (int i = 0; i < vector.Length; ++i)
- {
- if (vector[i] > biggestVal)
- {
- biggestVal = vector[i];
- bigIndex = i;
- }
- }
- return bigIndex;
- }
- } // NeuralNetwork
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement