Advertisement
YourMain12

GithubNeuralNetworkDevBuild - 04012024-1

Jan 4th, 2024 (edited)
645
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C# 21.30 KB | None | 0 0
  1. using System;
  2. using System.Linq;
  3.  
  4. public enum ActivationFunction
  5. {
  6.     ReLU,
  7.     Sigmoid,
  8.     Tanh,
  9.     LeakyReLU,
  10.     Swish,
  11.     Mish,
  12.     GELU
  13. }
  14.  
  15. public enum Regularizer
  16. {
  17.     None,
  18.     L1,
  19.     L2
  20. }
  21.  
  22. public class GithubNeuralNetwork
  23. {
  24.     private int[] _layers;
  25.     private Matrix[] _weights;
  26.     private Matrix[] _biases;
  27.     private Func<Matrix, Matrix>[] _activationFunctions;
  28.     private double _learningRate;
  29.     private double _epsilon;
  30.     private Matrix[] _gamma;
  31.     private Matrix[] _beta;
  32.     private double _initialLearningRate;
  33.     private double _decayRate;
  34.     private string _optimizer;
  35.     private Matrix[] _movingMeans;
  36.     private Matrix[] _movingVariances;
  37.     private Matrix[] _mWeights;
  38.     private Matrix[] _vWeights;
  39.     private Matrix[] _mBiases;
  40.     private Matrix[] _vBiases;
  41.     private Matrix[] _mGamma;
  42.     private Matrix[] _vGamma;
  43.     private Matrix[] _mBeta;
  44.     private Matrix[] _vBeta;
  45.     private Matrix[] _slowWeights;
  46.     private Matrix[] _slowBiases;
  47.     private double _lookaheadAlpha;
  48.     private double _lookaheadBeta;
  49.     private int _t;
  50.     private double _dropoutRate;
  51.     private Matrix[] _dropoutMasks;
  52.     private ActivationFunction[] _activationOptions;
  53.     private Regularizer _regularizer;
  54.     private double _lambda;
  55.  
  56.     public GithubNeuralNetwork(double learningRate, double epsilon, string optimizer, double decayRate, double dropoutRate, Regularizer regularizer, double lambda, params int[] layers, double lookaheadAlpha = 0.5, double lookaheadBeta = 0.9)
  57.     {
  58.         _layers = layers;
  59.         _weights = new Matrix[layers.Length - 1];
  60.         _biases = new Matrix[layers.Length - 1];
  61.         _activationFunctions = new Func<Matrix, Matrix>[layers.Length - 1];
  62.         _learningRate = learningRate;
  63.         _epsilon = epsilon;
  64.         _gamma = new Matrix[layers.Length - 1];
  65.         _beta = new Matrix[layers.Length - 1];
  66.         _initialLearningRate = learningRate;
  67.         _decayRate = decayRate;
  68.         _optimizer = optimizer;
  69.         _movingMeans = new Matrix[layers.Length - 1];
  70.         _movingVariances = new Matrix[layers.Length - 1];
  71.         _mWeights = new Matrix[layers.Length - 1];
  72.         _vWeights = new Matrix[layers.Length - 1];
  73.         _mBiases = new Matrix[layers.Length - 1];
  74.         _vBiases = new Matrix[layers.Length - 1];
  75.         _mGamma = new Matrix[layers.Length - 1];
  76.         _vGamma = new Matrix[layers.Length - 1];
  77.         _mBeta = new Matrix[layers.Length - 1];
  78.         _vBeta = new Matrix[layers.Length - 1];
  79.         _slowWeights = new Matrix[_weights.Length];
  80.         _slowBiases = new Matrix[_biases.Length];
  81.         _lookaheadAlpha = lookaheadAlpha;
  82.         _lookaheadBeta = lookaheadBeta;
  83.         _t = 1;
  84.         _dropoutRate = dropoutRate;
  85.         _dropoutMasks = new Matrix[layers.Length - 1];
  86.         _activationOptions = new ActivationFunction[layers.Length - 1];
  87.         _regularizer = regularizer;
  88.         _lambda = lambda;
  89.  
  90.         InitializeWeightsAndBiases();
  91.         SetActivationFunctions();
  92.         InitializeSlowWeightsAndBiases();
  93.     }
  94.    
  95.     private void InitializeSlowWeightsAndBiases()
  96.     {
  97.         for (int i = 0; i < _weights.Length; i++)
  98.         {
  99.             _slowWeights[i] = _weights[i].Copy();
  100.             _slowBiases[i] = _biases[i].Copy();
  101.         }
  102.     }
  103.    
  104.     private Matrix ResidualBlock(Matrix input, int layerIndex)
  105.     {
  106.         Matrix residual = input;
  107.         Matrix outputs = input;
  108.        
  109.         int numLayersInBlock = 2;
  110.         int units = _layers[layerIndex + 1];
  111.  
  112.         for (int i = 0; i < numLayersInBlock; i++)
  113.         {
  114.             Matrix layerOutput = outputs * _weights[layerIndex] + _biases[layerIndex];
  115.             layerOutput = layerOutput.Map(_activationFunctions[layerIndex]);
  116.  
  117.             outputs = layerOutput;
  118.         }
  119.        
  120.         if (outputs.RowCount == residual.RowCount && outputs.ColumnCount == residual.ColumnCount)
  121.         {
  122.             outputs += residual; // Adding the shortcut (residual) to the output
  123.         }
  124.  
  125.         return outputs;
  126.     }
  127.    
  128.     private void LookaheadOptimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases)
  129.     {
  130.         for (int i = 0; i < _weights.Length; i++)
  131.         {
  132.             _slowWeights[i] = (_lookaheadAlpha * _slowWeights[i]) + ((1 - _lookaheadAlpha) * _weights[i]);
  133.             _slowBiases[i] = (_lookaheadAlpha * _slowBiases[i]) + ((1 - _lookaheadAlpha) * _biases[i]);
  134.  
  135.             _weights[i] -= _learningRate * (_lookaheadBeta * gradientsWeights[i] + (1 - _lookaheadBeta) * (_slowWeights[i]));
  136.             _biases[i] -= _learningRate * (_lookaheadBeta * gradientsBiases[i] + (1 - _lookaheadBeta) * (_slowBiases[i]));
  137.         }
  138.     }
  139.    
  140.     private void Optimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, /* Existing parameters */)
  141.     {
  142.  
  143.     private void InitializeWeightsAndBiases()
  144.     {
  145.         Random rand = new Random();
  146.         for (int i = 0; i < _weights.Length; i++)
  147.         {
  148.             _weights[i] = XavierInitialization(_layers[i + 1], _layers[i], rand);
  149.             _biases[i] = Matrix.Zeros(_layers[i + 1], 1);
  150.             _gamma[i] = Matrix.Ones(_layers[i + 1], 1);
  151.             _beta[i] = Matrix.Zeros(_layers[i + 1], 1);
  152.  
  153.             _movingMeans[i] = Matrix.Zeros(_layers[i + 1], 1);
  154.             _movingVariances[i] = Matrix.Ones(_layers[i + 1], 1);
  155.  
  156.             _mWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  157.             _vWeights[i] = Matrix.Zeros(_weights[i].RowCount, _weights[i].ColumnCount);
  158.             _mBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  159.             _vBiases[i] = Matrix.Zeros(_biases[i].RowCount, _biases[i].ColumnCount);
  160.             _mGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  161.             _vGamma[i] = Matrix.Zeros(_gamma[i].RowCount, _gamma[i].ColumnCount);
  162.             _mBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  163.             _vBeta[i] = Matrix.Zeros(_beta[i].RowCount, _beta[i].ColumnCount);
  164.         }
  165.     }
  166.    
  167.     private Matrix Swish(Matrix x)
  168.     {
  169.         return x * MatrixFunctions.Sigmoid(x);
  170.     }
  171.  
  172.     private Matrix Mish(Matrix x)
  173.     {
  174.         return x * MatrixFunctions.Tanh(MatrixFunctions.Softplus(x));
  175.     }
  176.  
  177.     private Matrix GELU(Matrix x)
  178.     {
  179.         return 0.5 * x * (1 + MatrixFunctions.Tanh((Math.Sqrt(2 / Math.PI) * (x + 0.044715 * Math.Pow(x, 3)))));
  180.     }
  181.  
  182.     private void SetActivationFunctions()
  183.     {
  184.         Random rand = new Random();
  185.         for (int i = 0; i < _activationOptions.Length; i++)
  186.         {
  187.             int choice = rand.Next(7); // Randomly choose an activation function
  188.             _activationOptions[i] = (ActivationFunction)choice;
  189.         }
  190.     }
  191.  
  192.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  193.     {
  194.         double scale = Math.Sqrt(2.0 / (rows + cols));
  195.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  196.     }
  197.  
  198.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  199.     {
  200.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  201.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  202.  
  203.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  204.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  205.  
  206.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  207.         return (gamma * normalized) + beta;
  208.     }
  209.  
  210.     private Matrix FeedForward(Matrix input, bool training)
  211.     {
  212.         Matrix outputs = input;
  213.  
  214.         for (int i = 0; i < _weights.Length; i++)
  215.         {
  216.             if (training && _dropoutRate > 0.0)
  217.             {
  218.                 _dropoutMasks[i] = Matrix.RandomMatrix(outputs.RowCount, outputs.ColumnCount);
  219.                 _dropoutMasks[i] = _dropoutMasks[i].Map(x => x < _dropoutRate ? 0 : 1);
  220.                 outputs = outputs.PointwiseMultiply(_dropoutMasks[i]);
  221.                 outputs *= 1.0 / (1.0 - _dropoutRate); // Scale the remaining neurons
  222.             }
  223.  
  224.             outputs = outputs * _weights[i] + _biases[i];
  225.  
  226.             switch (_activationOptions[i])
  227.             {
  228.                 case ActivationFunction.ReLU:
  229.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  230.                     break;
  231.                 case ActivationFunction.Sigmoid:
  232.                     outputs = outputs.Map(MatrixFunctions.Sigmoid);
  233.                     break;
  234.                 case ActivationFunction.Tanh:
  235.                     outputs = outputs.Map(MatrixFunctions.Tanh);
  236.                     break;
  237.                 case ActivationFunction.LeakyReLU:
  238.                     outputs = outputs.Map(MatrixFunctions.LeakyReLU);
  239.                     break;
  240.                 default:
  241.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  242.                     break;
  243.             }
  244.         }
  245.         return outputs;
  246.        private Matrix FeedForward(Matrix input, bool training)
  247.     {
  248.         Matrix outputs = input;
  249.  
  250.         for (int i = 0; i < _weights.Length; i++)
  251.         {
  252.             if (training && _dropoutRate > 0.0)
  253.             {
  254.                 _dropoutMasks[i] = Matrix.RandomMatrix(outputs.RowCount, outputs.ColumnCount);
  255.                 _dropoutMasks[i] = _dropoutMasks[i].Map(x => x < _dropoutRate ? 0 : 1);
  256.                 outputs = outputs.PointwiseMultiply(_dropoutMasks[i]);
  257.                 outputs *= 1.0 / (1.0 - _dropoutRate); // Scale the remaining neurons
  258.             }
  259.  
  260.             outputs = outputs * _weights[i] + _biases[i];
  261.  
  262.             switch (_activationOptions[i])
  263.             {
  264.                 case ActivationFunction.ReLU:
  265.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  266.                     break;
  267.                 case ActivationFunction.Sigmoid:
  268.                     outputs = outputs.Map(MatrixFunctions.Sigmoid);
  269.                     break;
  270.                 case ActivationFunction.Tanh:
  271.                     outputs = outputs.Map(MatrixFunctions.Tanh);
  272.                     break;
  273.                 case ActivationFunction.LeakyReLU:
  274.                     outputs = outputs.Map(MatrixFunctions.LeakyReLU);
  275.                     break;
  276.                 default:
  277.                     outputs = outputs.Map(MatrixFunctions.ReLU);
  278.                     break;
  279.             }
  280.         }
  281.         return outputs;
  282.     }
  283.  
  284.     private void Backpropagation(Matrix input, Matrix target)
  285.     {
  286.         Matrix[] outputs = new Matrix[_weights.Length + 1];
  287.         outputs[0] = input;
  288.  
  289.         for (int i = 0; i < _weights.Length; i++)
  290.         {
  291.             outputs[i + 1] = outputs[i] * _weights[i] + _biases[i];
  292.             outputs[i + 1] = outputs[i + 1].Map(_activationFunctions[i]);
  293.         }
  294.  
  295.         Matrix[] errors = new Matrix[_weights.Length];
  296.         errors[_weights.Length - 1] = outputs[^1] - target;
  297.  
  298.         for (int i = _weights.Length - 2; i >= 0; i--)
  299.         {
  300.             errors[i] = (_weights[i + 1].Transpose() * errors[i + 1]).MapDerivative(_activationFunctions[i]);
  301.         }
  302.  
  303.         Matrix[] gradientsWeights = new Matrix[_weights.Length];
  304.         Matrix[] gradientsBiases = new Matrix[_weights.Length];
  305.         Matrix[] gradientsGamma = new Matrix[_weights.Length];
  306.         Matrix[] gradientsBeta = new Matrix[_weights.Length];
  307.  
  308.         for (int i = 0; i < _weights.Length; i++)
  309.         {
  310.             gradientsWeights[i] = errors[i] * outputs[i].Transpose();
  311.             gradientsBiases[i] = errors[i];
  312.             gradientsGamma[i] = errors[i] * _movingMeans[i];
  313.             gradientsBeta[i] = errors[i] * _movingVariances[i];
  314.         }
  315.  
  316.         Optimizer(gradientsWeights, gradientsBiases, gradientsGamma, gradientsBeta);
  317.  
  318.         // Regularization
  319.         if (_regularizer != Regularizer.None)
  320.         {
  321.             for (int i = 0; i < _weights.Length; i++)
  322.             {
  323.                 if (_regularizer == Regularizer.L1)
  324.                 {
  325.                     _weights[i] -= (_lambda * MatrixFunctions.Sign(_weights[i]));
  326.                 }
  327.                 else if (_regularizer == Regularizer.L2)
  328.                 {
  329.                     _weights[i] -= (_lambda * _weights[i]);
  330.                 }
  331.             }
  332.         }
  333.     }
  334.  
  335.     public void Train(Matrix[] inputs, Matrix[] targets, int epochs, int batchSize)
  336.     {
  337.         Random rand = new Random();
  338.  
  339.         for (int epoch = 0; epoch < epochs; epoch++)
  340.         {
  341.             for (int i = 0; i < inputs.Length; i += batchSize)
  342.             {
  343.                 Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  344.                 Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  345.  
  346.                 for (int j = 0; j < batchSize; j++)
  347.                 {
  348.                     Matrix outputs = FeedForward(batchInputs[j], true);
  349.                     Backpropagation(batchInputs[j], batchTargets[j]);
  350.                 }
  351.             }
  352.  
  353.             LearningRateScheduler(epoch);
  354.         }
  355.     }
  356.  
  357.     public Matrix Predict(Matrix input)
  358.     {
  359.         return FeedForward(input, false);
  360.     }
  361.  
  362.     private void LearningRateScheduler(int epoch)
  363.     {
  364.         _learningRate = _initialLearningRate / (1 + _decayRate * epoch);
  365.     }
  366.  
  367.     public class GithubNeuralNetwork
  368. {
  369.     // Existing fields and methods...
  370.  
  371.     private Matrix XavierInitialization(int rows, int cols, Random rand)
  372.     {
  373.         double scale = Math.Sqrt(2.0 / (rows + cols));
  374.         return Matrix.RandomMatrix(rows, cols, rand) * scale;
  375.     }
  376.  
  377.     private Matrix LayerNormalization(Matrix x, Matrix gamma, Matrix beta, int layerIndex)
  378.     {
  379.         Matrix mean = MatrixFunctions.Mean(x, axis: 1);
  380.         Matrix variance = MatrixFunctions.Variance(x, axis: 1);
  381.  
  382.         _movingMeans[layerIndex] = (_movingMeans[layerIndex] * 0.9) + (mean * 0.1);
  383.         _movingVariances[layerIndex] = (_movingVariances[layerIndex] * 0.9) + (variance * 0.1);
  384.  
  385.         Matrix normalized = (x - mean) / MatrixFunctions.Sqrt(variance + _epsilon);
  386.         return (gamma * normalized) + beta;
  387.     }
  388.  
  389.     private void Optimizer(Matrix[] gradientsWeights, Matrix[] gradientsBiases, Matrix[] gradientsGamma, Matrix[] gradientsBeta)
  390.     {
  391.     double final_lr = 0.1; // Define the final learning rate for AdaBound
  392.     double beta1 = 0.9; // Adam's hyperparameter (momentum decay)
  393.     double beta2 = 0.999; // Adam's hyperparameter (RMSprop decay)
  394.     double epsilon = 1e-8; // Small constant to prevent division by zero
  395.     double gamma = 1e-3; // AdaBound's hyperparameter
  396.     double schedule_decay = 0.004;
  397.    
  398.     for (int epoch = 0; epoch < epochs; epoch++)
  399.     {
  400.         for (int i = 0; i < inputs.Length; i += batchSize)
  401.     {
  402.         Matrix[] batchInputs = inputs.Skip(i).Take(batchSize).ToArray();
  403.         Matrix[] batchTargets = targets.Skip(i).Take(batchSize).ToArray();
  404.         _t++;
  405.        
  406.         double step_size = _learningRate * Math.Sqrt(1 - Math.Pow(beta2, _t)) / (1 - Math.Pow(beta1, _t));
  407.         double lower_bound = final_lr * (1.0 - 1.0 / (_t + 1));
  408.         double upper_bound = final_lr * (1.0 + 1.0 / (_t + 1));
  409.                
  410.         for (int k = 0; k < _weights.Length; k++)
  411.     {
  412.         for (int j = 0; j < batchSize; j++)
  413.     {
  414.         Matrix outputs = FeedForward(batchInputs[j], true);
  415.         Backpropagation(batchInputs[j], batchTargets[j]);
  416.         for (int i = 0; i < _weights.Length; i++)
  417.         {
  418.             for (int i = 0; i < _weights.Length; i++)
  419.             {
  420.             _t++;
  421.            
  422.             _mWeights[i] = (beta1 * _mWeights[i]) + ((1 - beta1) * gradientsWeights[i]);
  423.             _vWeights[i] = (beta2 * _vWeights[i]) + ((1 - beta2) * (gradientsWeights[i] * gradientsWeights[i]));
  424.  
  425.             _mBiases[i] = (beta1 * _mBiases[i]) + ((1 - beta1) * gradientsBiases[i]);
  426.             _vBiases[i] = (beta2 * _vBiases[i]) + ((1 - beta2) * (gradientsBiases[i] * gradientsBiases[i]));
  427.  
  428.             _mGamma[i] = (beta1 * _mGamma[i]) + ((1 - beta1) * gradientsGamma[i]);
  429.             _vGamma[i] = (beta2 * _vGamma[i]) + ((1 - beta2) * (gradientsGamma[i] * gradientsGamma[i]));
  430.  
  431.             _mBeta[i] = (beta1 * _mBeta[i]) + ((1 - beta1) * gradientsBeta[i]);
  432.             _vBeta[i] = (beta2 * _vBeta[i]) + ((1 - beta2) * (gradientsBeta[i] * gradientsBeta[i]));
  433.  
  434.             double schedule = schedule_decay * (1 - Math.Pow(0.999, _t)) / (1 - Math.Pow(0.9, _t));
  435.  
  436.             Matrix mHatWeights = _mWeights[i] / (1 - Math.Pow(beta1, _t));
  437.             Matrix vHatWeights = _vWeights[i] / (1 - Math.Pow(beta2, _t));
  438.  
  439.             Matrix mHatBiases = _mBiases[i] / (1 - Math.Pow(beta1, _t));
  440.             Matrix vHatBiases = _vBiases[i] / (1 - Math.Pow(beta2, _t));
  441.  
  442.             Matrix mHatGamma = _mGamma[i] / (1 - Math.Pow(beta1, _t));
  443.             Matrix vHatGamma = _vGamma[i] / (1 - Math.Pow(beta2, _t));
  444.  
  445.             Matrix mHatBeta = _mBeta[i] / (1 - Math.Pow(beta1, _t));
  446.             Matrix vHatBeta = _vBeta[i] / (1 - Math.Pow(beta2, _t));
  447.  
  448.             _weights[i] -= (_learningRate * schedule * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  449.             _biases[i] -= (_learningRate * schedule * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  450.             _gamma[i] -= (_learningRate * schedule * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  451.             _beta[i] -= (_learningRate * schedule * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  452.             }
  453.             if (_optimizer == "Adam")
  454.             {
  455.                 _mWeights[i] = (beta1 * _mWeights[i]) + ((1 - beta1) * gradientsWeights[i]);
  456.                 _vWeights[i] = (beta2 * _vWeights[i]) + ((1 - beta2) * (gradientsWeights[i] * gradientsWeights[i]));
  457.  
  458.                 _mBiases[i] = (beta1 * _mBiases[i]) + ((1 - beta1) * gradientsBiases[i]);
  459.                 _vBiases[i] = (beta2 * _vBiases[i]) + ((1 - beta2) * (gradientsBiases[i] * gradientsBiases[i]));
  460.  
  461.                 _mGamma[i] = (beta1 * _mGamma[i]) + ((1 - beta1) * gradientsGamma[i]);
  462.                 _vGamma[i] = (beta2 * _vGamma[i]) + ((1 - beta2) * (gradientsGamma[i] * gradientsGamma[i]));
  463.  
  464.                 _mBeta[i] = (beta1 * _mBeta[i]) + ((1 - beta1) * gradientsBeta[i]);
  465.                 _vBeta[i] = (beta2 * _vBeta[i]) + ((1 - beta2) * (gradientsBeta[i] * gradientsBeta[i]));
  466.  
  467.                 Matrix mHatWeights = _mWeights[i] / (1 - Math.Pow(beta1, _t));
  468.                 Matrix vHatWeights = _vWeights[i] / (1 - Math.Pow(beta2, _t));
  469.  
  470.                 Matrix mHatBiases = _mBiases[i] / (1 - Math.Pow(beta1, _t));
  471.                 Matrix vHatBiases = _vBiases[i] / (1 - Math.Pow(beta2, _t));
  472.  
  473.                 Matrix mHatGamma = _mGamma[i] / (1 - Math.Pow(beta1, _t));
  474.                 Matrix vHatGamma = _vGamma[i] / (1 - Math.Pow(beta2, _t));
  475.  
  476.                 Matrix mHatBeta = _mBeta[i] / (1 - Math.Pow(beta1, _t));
  477.                 Matrix vHatBeta = _vBeta[i] / (1 - Math.Pow(beta2, _t));
  478.  
  479.                 _weights[i] -= (_learningRate * mHatWeights) / (MatrixFunctions.Sqrt(vHatWeights) + epsilon);
  480.                 _biases[i] -= (_learningRate * mHatBiases) / (MatrixFunctions.Sqrt(vHatBiases) + epsilon);
  481.                 _gamma[i] -= (_learningRate * mHatGamma) / (MatrixFunctions.Sqrt(vHatGamma) + epsilon);
  482.                 _beta[i] -= (_learningRate * mHatBeta) / (MatrixFunctions.Sqrt(vHatBeta) + epsilon);
  483.  
  484.                 _t++;
  485.             }
  486.             else if (_optimizer == "AdaGrad")
  487.             {
  488.                 _vWeights[i] += gradientsWeights[i] * gradientsWeights[i];
  489.                 _vBiases[i] += gradientsBiases[i] * gradientsBiases[i];
  490.                 _vGamma[i] += gradientsGamma[i] * gradientsGamma[i];
  491.                 _vBeta[i] += gradientsBeta[i] * gradientsBeta[i];
  492.  
  493.                 _weights[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vWeights[i]) + epsilon)) * gradientsWeights[i];
  494.                 _biases[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBiases[i]) + epsilon)) * gradientsBiases[i];
  495.                 _gamma[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vGamma[i]) + epsilon)) * gradientsGamma[i];
  496.                 _beta[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBeta[i]) + epsilon)) * gradientsBeta[i];
  497.             }
  498.             else if (_optimizer == "RMSProp")
  499.             {
  500.                 _vWeights[i] = (beta1 * _vWeights[i]) + ((1 - beta1) * (gradientsWeights[i] * gradientsWeights[i]));
  501.                 _vBiases[i] = (beta1 * _vBiases[i]) + ((1 - beta1) * (gradientsBiases[i] * gradientsBiases[i]));
  502.                 _vGamma[i] = (beta1 * _vGamma[i]) + ((1 - beta1) * (gradientsGamma[i] * gradientsGamma[i]));
  503.                 _vBeta[i] = (beta1 * _vBeta[i]) + ((1 - beta1) * (gradientsBeta[i] * gradientsBeta[i]));
  504.  
  505.                 _weights[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vWeights[i]) + epsilon)) * gradientsWeights[i];
  506.                 _biases[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBiases[i]) + epsilon)) * gradientsBiases[i];
  507.                 _gamma[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vGamma[i]) + epsilon)) * gradientsGamma[i];
  508.                 _beta[i] -= (_learningRate / (MatrixFunctions.Sqrt(_vBeta[i]) + epsilon)) * gradientsBeta[i];
  509.             }
  510.             else if (_optimizer == "Lookahead")
  511.             {
  512.                 LookaheadOptimizer(gradientsWeights, gradientsBiases);
  513.             }
  514.             else
  515.             {
  516.                 _weights[i] -= _learningRate * gradientsWeights[i];
  517.                 _biases[i] -= _learningRate * gradientsBiases[i];
  518.                 _gamma[i] -= _learningRate * gradientsGamma[i];
  519.                 _beta[i] -= _learningRate * gradientsBeta[i];
  520.             }
  521.         }
  522.     }
  523. }
  524.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement