Advertisement
YourMain12

GithubNeuralNetwork - 04012024_Beta

Jan 4th, 2024
803
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 14.81 KB | None | 0 0
  1. using System;
  2. using System.Linq;
  3.  
  4. public enum ActivationFunction
  5. {
  6.     ReLU,
  7.     Sigmoid,
  8.     Tanh,
  9.     LeakyReLU
  10. }
  11.  
  12. public enum Regularizer
  13. {
  14.     None,
  15.     L1,
  16.     L2
  17. }
  18.  
  19. public class GithubNeuralNetwork
  20. {
  21.     private int[] _layers;
  22.     private Matrix[] _weights;
  23.     private Matrix[] _biases;
  24.     private Func<Matrix, Matrix>[] _activationFunctions;
  25.     private double _learningRate;
  26.     private double _epsilon;
  27.     private Matrix[] _gamma;
  28.     private Matrix[] _beta;
  29.     private double _initialLearningRate;
  30.     private double _decayRate;
  31.     private string _optimizer;
  32.     private Matrix[] _movingMeans;
  33.     private Matrix[] _movingVariances;
  34.     private Matrix[] _mWeights;
  35.     private Matrix[] _vWeights;
  36.     private Matrix[] _mBiases;
  37.     private Matrix[] _vBiases;
  38.     private Matrix[] _mGamma;
  39.     private Matrix[] _vGamma;
  40.     private Matrix[] _mBeta;
  41.     private Matrix[] _vBeta;
  42.     private int _t;
  43.     private double _dropoutRate;
  44.     private Matrix[] _dropoutMasks;
  45.     private ActivationFunction[] _activationOptions;
  46.     private Regularizer _regularizer;
  47.     private double _lambda;
  48.  
  49.     public GithubNeuralNetwork(double learningRate, double epsilon, string optimizer, double decayRate, double dropoutRate, Regularizer regularizer, double lambda, params int[] layers)
  50.     {
  51.         _layers = layers;
  52.         _weights = new Matrix[layers.Length - 1];
  53.         _biases = new Matrix[layers.Length - 1];
  54.         _activationFunctions = new Func<Matrix, Matrix>[layers.Length - 1];
  55.         _learningRate = learningRate;
  56.         _epsilon = epsilon;
  57.         _gamma = new Matrix[layers.Length - 1];
  58.         _beta = new Matrix[layers.Length - 1];
  59.         _initialLearningRate = learningRate;
  60.         _decayRate = decayRate;
  61.         _optimizer = optimizer;
  62.         _movingMeans = new Matrix[layers.Length - 1];
  63.         _movingVariances = new Matrix[layers.Length - 1];
  64.         _mWeights = new Matrix[layers.Length - 1];
  65.         _vWeights = new Matrix[layers.Length - 1];
  66.         _mBiases = new Matrix[layers.Length - 1];
  67.         _vBiases = new Matrix[layers.Length - 1];
  68.         _mGamma = new Matrix[layers.Length - 1];
  69.         _vGamma = new Matrix[layers.Length - 1];
  70.         _mBeta = new Matrix[layers.Length - 1];
  71.         _vBeta = new Matrix[layers.Length - 1];
  72.         _t = 1;
  73.         _dropoutRate = dropoutRate;
  74.         _dropoutMasks = new Matrix[layers.Length - 1];
  75.         _activationOptions = new ActivationFunction[layers.Length - 1];
  76.         _regularizer = regularizer;
  77.         _lambda = lambda;
  78.  
  79.         InitializeWeightsAndBiases();
  80.         SetActivationFunctions();
  81.     }
  82.  
  83.     private void InitializeWeightsAndBiases()
  84.     {
  85.         Random rand = new Random();
  86.         for (int i = 0; i < _weights.Length; i++)
  87.         {
  88.             _weights[i] = XavierInitialization(_layers[i + 1], _layers[i], rand);
  89.             _biases[i] = Matrix.Zeros(_layers[i + 1], 1);
  90.             _gamma[i] = Matrix.Ones(_layers[i + 1], 1);
  91.             _beta[i] = Matrix.Zeros(_layers[i + 1], 1);
  92.  
  93.             _movingMeans[i] = Matrix.Zeros(_layers[i + 1], 1);
  94.             _movingVariances[i] = Matrix.Ones(_layers[i + 1], 1);
  95.  
  96.             _mWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  97.             _vWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  98.             _mBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  99.             _vBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  100.             _mGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  101.             _vGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  102.             _mBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  103.             _vBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  104.         }
  105.     }
  106.  
  107.     private void SetActivationFunctions()
  108.     {
  109.         Random rand = new Random();
  110.         for (int i = 0; i < _activationOptions.Length; i++)
  111.         {
  112.             int choice = rand.Next(4); // Randomly choose an activation function
  113.             _activationOptions[i] = (ActivationFunction)choice;
  114.         }
  115.     }
  116.  
  117.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  118.     {
  119.         double scale = Math.Sqrt(2.0 / (rows + cols));
  120.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  121.     }
  122.  
  123.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  124.     {
  125.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  126.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  127.  
  128.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  129.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  130.  
  131.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  132.         return (gamma * normalized) + beta;
  133.     }
  134.  
  135.     private Matrix FeedForward(Matrix input, bool training)
  136.     {
  137.         Matrix outputs = input;
  138.  
  139.         for (int i = 0; i < _weights.Length; i++)
  140.         {
  141.             if (training && _dropoutRate > 0.0)
  142.             {
  143.                 _dropoutMasks[i] = Matrix.RandomMatrix(outputs.RowCount, outputs.ColumnCount);
  144.                 _dropoutMasks[i] = _dropoutMasks[i].Map(x => x < _dropoutRate ? 0 : 1);
  145.                 outputs = outputs.PointwiseMultiply(_dropoutMasks[i]);
  146.                 outputs *= 1.0 / (1.0 - _dropoutRate); // Scale the remaining neurons
  147.             }
  148.  
  149.             outputs = outputs * _weights[i] + _biases[i];
  150.  
  151.             switch (_activationOptions[i])
  152.             {
  153.                 case ActivationFunction.ReLU:
  154.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  155.                     break;
  156.                 case ActivationFunction.Sigmoid:
  157.                     outputs = outputs.Map(MatrixFunctions.Sigmoid);
  158.                     break;
  159.                 case ActivationFunction.Tanh:
  160.                     outputs = outputs.Map(MatrixFunctions.Tanh);
  161.                     break;
  162.                 case ActivationFunction.LeakyReLU:
  163.                     outputs = outputs.Map(MatrixFunctions.LeakyReLU);
  164.                     break;
  165.                 default:
  166.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  167.                     break;
  168.             }
  169.         }
  170.         return outputs;
  171.        private Matrix FeedForward(Matrix input, bool training)
  172.     {
  173.         Matrix outputs = input;
  174.  
  175.         for (int i = 0; i < _weights.Length; i++)
  176.         {
  177.             if (training && _dropoutRate > 0.0)
  178.             {
  179.                 _dropoutMasks[i] = Matrix.RandomMatrix(outputs.RowCount, outputs.ColumnCount);
  180.                 _dropoutMasks[i] = _dropoutMasks[i].Map(x => x < _dropoutRate ? 0 : 1);
  181.                 outputs = outputs.PointwiseMultiply(_dropoutMasks[i]);
  182.                 outputs *= 1.0 / (1.0 - _dropoutRate); // Scale the remaining neurons
  183.             }
  184.  
  185.             outputs = outputs * _weights[i] + _biases[i];
  186.  
  187.             switch (_activationOptions[i])
  188.             {
  189.                 case ActivationFunction.ReLU:
  190.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  191.                     break;
  192.                 case ActivationFunction.Sigmoid:
  193.                     outputs = outputs.Map(MatrixFunctions.Sigmoid);
  194.                     break;
  195.                 case ActivationFunction.Tanh:
  196.                     outputs = outputs.Map(MatrixFunctions.Tanh);
  197.                     break;
  198.                 case ActivationFunction.LeakyReLU:
  199.                     outputs = outputs.Map(MatrixFunctions.LeakyReLU);
  200.                     break;
  201.                 default:
  202.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  203.                     break;
  204.             }
  205.         }
  206.         return outputs;
  207.     }
  208.  
  209.     private void Backpropagation(Matrix input, Matrix target)
  210.     {
  211.         Matrix[] outputs = new Matrix[_weights.Length + 1];
  212.         outputs[0] = input;
  213.  
  214.         for (int i = 0; i < _weights.Length; i++)
  215.         {
  216.             outputs[i + 1] = outputs[i] * _weights[i] + _biases[i];
  217.             outputs[i + 1] = outputs[i + 1].Map(_activationFunctions[i]);
  218.         }
  219.  
  220.         Matrix[] errors = new Matrix[_weights.Length];
  221.         errors[_weights.Length - 1] = outputs[^1] - target;
  222.  
  223.         for (int i = _weights.Length - 2; i >= 0; i--)
  224.         {
  225.             errors[i] = (_weights[i + 1].Transpose() * errors[i + 1]).MapDerivative(_activationFunctions[i]);
  226.         }
  227.  
  228.         Matrix[] gradientsWeights = new Matrix[_weights.Length];
  229.         Matrix[] gradientsBiases = new Matrix[_weights.Length];
  230.         Matrix[] gradientsGamma = new Matrix[_weights.Length];
  231.         Matrix[] gradientsBeta = new Matrix[_weights.Length];
  232.  
  233.         for (int i = 0; i < _weights.Length; i++)
  234.         {
  235.             gradientsWeights[i] = errors[i] * outputs[i].Transpose();
  236.             gradientsBiases[i] = errors[i];
  237.             gradientsGamma[i] = errors[i] * _movingMeans[i];
  238.             gradientsBeta[i] = errors[i] * _movingVariances[i];
  239.         }
  240.  
  241.         Optimizer(gradientsWeights, gradientsBiases, gradientsGamma, gradientsBeta);
  242.  
  243.         // Regularization
  244.         if (_regularizer != Regularizer.None)
  245.         {
  246.             for (int i = 0; i < _weights.Length; i++)
  247.             {
  248.                 if (_regularizer == Regularizer.L1)
  249.                 {
  250.                     _weights[i] -= (_lambda * MatrixFunctions.Sign(_weights[i]));
  251.                 }
  252.                 else if (_regularizer == Regularizer.L2)
  253.                 {
  254.                     _weights[i] -= (_lambda * _weights[i]);
  255.                 }
  256.             }
  257.         }
  258.     }
  259.  
  260.     public void Train(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize)
  261.     {
  262.         Random rand = new Random();
  263.  
  264.         for (int epoch = 0; epoch < epochs; epoch++)
  265.         {
  266.             for (int i = 0; i < inputs.Length; i += batchSize)
  267.             {
  268.                 Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  269.                 Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  270.  
  271.                 for (int j = 0; j < batchSize; j++)
  272.                 {
  273.                     Matrix outputs = FeedForward(batchInputs[j], true);
  274.                     Backpropagation(batchInputs[j], batchTargets[j]);
  275.                 }
  276.             }
  277.  
  278.             LearningRateScheduler(epoch);
  279.         }
  280.     }
  281.  
  282.     public Matrix Predict(Matrix input)
  283.     {
  284.         return FeedForward(input, false);
  285.     }
  286.  
  287.     private void LearningRateScheduler(int epoch)
  288.     {
  289.         _learningRate = _initialLearningRate / (1 + _decayRate * epoch);
  290.     }
  291.  
  292.     public class GithubNeuralNetwork
  293. {
  294.     // Existing fields and methods...
  295.  
  296.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  297.     {
  298.         double scale = Math.Sqrt(2.0 / (rows + cols));
  299.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  300.     }
  301.  
  302.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  303.     {
  304.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  305.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  306.  
  307.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  308.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  309.  
  310.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  311.         return (gamma * normalized) + beta;
  312.     }
  313.  
  314.     private void Optimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, Matrix[] gradientsGamma, Matrix[] gradientsBeta)
  315.     {
  316.         double beta1 = 0.9; // Momentum decay hyperparameter
  317.         double beta2 = 0.999; // RMSProp decay hyperparameter
  318.         double epsilon = 1e-8; // Small constant to avoid division by zero
  319.  
  320.         for (int i = 0; i < _weights.Length; i++)
  321.         {
  322.             if (_optimizer == "Adam")
  323.             {
  324.                 _mWeights[i] = (beta1 * _mWeights[i]) + ((1 - beta1) * gradientsWeights[i]);
  325.                 _vWeights[i] = (beta2 * _vWeights[i]) + ((1 - beta2) * (gradientsWeights[i] * gradientsWeights[i]));
  326.  
  327.                 _mBiases[i] = (beta1 * _mBiases[i]) + ((1 - beta1) * gradientsBiases[i]);
  328.                 _vBiases[i] = (beta2 * _vBiases[i]) + ((1 - beta2) * (gradientsBiases[i] * gradientsBiases[i]));
  329.  
  330.                 _mGamma[i] = (beta1 * _mGamma[i]) + ((1 - beta1) * gradientsGamma[i]);
  331.                 _vGamma[i] = (beta2 * _vGamma[i]) + ((1 - beta2) * (gradientsGamma[i] * gradientsGamma[i]));
  332.  
  333.                 _mBeta[i] = (beta1 * _mBeta[i]) + ((1 - beta1) * gradientsBeta[i]);
  334.                 _vBeta[i] = (beta2 * _vBeta[i]) + ((1 - beta2) * (gradientsBeta[i] * gradientsBeta[i]));
  335.  
  336.                 Matrix mHatWeights = _mWeights[i] / (1 - Math.Pow(beta1, _t));
  337.                 Matrix vHatWeights = _vWeights[i] / (1 - Math.Pow(beta2, _t));
  338.  
  339.                 Matrix mHatBiases = _mBiases[i] / (1 - Math.Pow(beta1, _t));
  340.                 Matrix vHatBiases = _vBiases[i] / (1 - Math.Pow(beta2, _t));
  341.  
  342.                 Matrix mHatGamma = _mGamma[i] / (1 - Math.Pow(beta1, _t));
  343.                 Matrix vHatGamma = _vGamma[i] / (1 - Math.Pow(beta2, _t));
  344.  
  345.                 Matrix mHatBeta = _mBeta[i] / (1 - Math.Pow(beta1, _t));
  346.                 Matrix vHatBeta = _vBeta[i] / (1 - Math.Pow(beta2, _t));
  347.  
  348.                 _weights[i] -= (_learningRate * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  349.                 _biases[i] -= (_learningRate * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  350.                 _gamma[i] -= (_learningRate * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  351.                 _beta[i] -= (_learningRate * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  352.  
  353.                 _t++;
  354.             }
  355.             else if (_optimizer == "AdaGrad")
  356.             {
  357.                 _vWeights[i] += gradientsWeights[i] * gradientsWeights[i];
  358.                 _vBiases[i] += gradientsBiases[i] * gradientsBiases[i];
  359.                 _vGamma[i] += gradientsGamma[i] * gradientsGamma[i];
  360.                 _vBeta[i] += gradientsBeta[i] * gradientsBeta[i];
  361.  
  362.                 _weights[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vWeights[i]) + epsilon)) * gradientsWeights[i];
  363.                 _biases[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBiases[i]) + epsilon)) * gradientsBiases[i];
  364.                 _gamma[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vGamma[i]) + epsilon)) * gradientsGamma[i];
  365.                 _beta[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBeta[i]) + epsilon)) * gradientsBeta[i];
  366.             }
  367.             else
  368.             {
  369.                 _weights[i] -= _learningRate * gradientsWeights[i];
  370.                 _biases[i] -= _learningRate * gradientsBiases[i];
  371.                 _gamma[i] -= _learningRate * gradientsGamma[i];
  372.                 _beta[i] -= _learningRate * gradientsBeta[i];
  373.             }
  374.         }
  375.     }
  376. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement