Advertisement
Trainlover08

neural_network.cpp

Aug 19th, 2024 (edited)
73
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 11.04 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <cmath>
  4. #include <random>
  5. #include <cassert>
  6. #include <algorithm>
  7. #include <deque>
  8. #include <tuple>
  9.  
  10. using namespace std;
  11.  
  12. class ReplayBuffer {
  13. private:
  14.     struct Transition {
  15.         std::vector<double> state;
  16.         int action;
  17.         double reward;
  18.         std::vector<double> nextState;
  19.         bool done;
  20.     };
  21.  
  22.     std::deque<Transition> buffer;
  23.     size_t capacity;
  24.     std::mt19937 rng;
  25.  
  26. public:
  27.     ReplayBuffer(size_t capacity) : capacity(capacity) {
  28.         std::random_device rd;
  29.         rng = std::mt19937(rd());
  30.     }
  31.  
  32.     void storeTransition(const std::vector<double>& state, int action, double reward, const std::vector<double>& nextState, bool done) {
  33.         if (buffer.size() >= capacity) {
  34.             buffer.pop_front();
  35.         }
  36.         buffer.push_back({state, action, reward, nextState, done});
  37.     }
  38.  
  39.     std::tuple<std::vector<std::vector<double>>, std::vector<int>, std::vector<double>, std::vector<std::vector<double>>, std::vector<bool>>
  40.     sampleBatch(size_t batchSize) {
  41.         std::vector<std::vector<double>> states(batchSize);
  42.         std::vector<int> actions(batchSize);
  43.         std::vector<double> rewards(batchSize);
  44.         std::vector<std::vector<double>> nextStates(batchSize);
  45.         std::vector<bool> dones(batchSize);
  46.  
  47.         std::uniform_int_distribution<size_t> dist(0, buffer.size() - 1);
  48.  
  49.         for (size_t i = 0; i < batchSize; ++i) {
  50.             size_t idx = dist(rng);
  51.             const Transition& transition = buffer[idx];
  52.  
  53.             states[i] = transition.state;
  54.             actions[i] = transition.action;
  55.             rewards[i] = transition.reward;
  56.             nextStates[i] = transition.nextState;
  57.             dones[i] = transition.done;
  58.         }
  59.  
  60.         return {states, actions, rewards, nextStates, dones};
  61.     }
  62.  
  63.     bool isReady(size_t batchSize) const {
  64.         return buffer.size() >= batchSize;
  65.     }
  66. };
  67.  
  68. class AdamOptimizer {
  69. public:
  70.     double lr;
  71.     double beta1;
  72.     double beta2;
  73.     double epsilon;
  74.     int t;
  75.  
  76.     AdamOptimizer(double learning_rate, double beta1, double beta2, double epsilon)
  77.         : lr(learning_rate), beta1(beta1), beta2(beta2), epsilon(epsilon), t(0) {}
  78.  
  79.     void update(vector<vector<double>>& weights, vector<vector<double>>& m, vector<vector<double>>& v, const vector<vector<double>>& grads) {
  80.         t++;
  81.         for (size_t i = 0; i < weights.size(); ++i) {
  82.             for (size_t j = 0; j < weights[0].size(); ++j) {
  83.                 m[i][j] = beta1 * m[i][j] + (1 - beta1) * grads[i][j];
  84.                 v[i][j] = beta2 * v[i][j] + (1 - beta2) * grads[i][j] * grads[i][j];
  85.                 double m_hat = m[i][j] / (1 - pow(beta1, t));
  86.                 double v_hat = v[i][j] / (1 - pow(beta2, t));
  87.                 weights[i][j] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  88.             }
  89.         }
  90.     }
  91.  
  92.     void update(vector<double>& biases, vector<double>& m, vector<double>& v, const vector<double>& grads) {
  93.         t++;
  94.         for (size_t i = 0; i < biases.size(); ++i) {
  95.             m[i] = beta1 * m[i] + (1 - beta1) * grads[i];
  96.             v[i] = beta2 * v[i] + (1 - beta2) * grads[i] * grads[i];
  97.             double m_hat = m[i] / (1 - pow(beta1, t));
  98.             double v_hat = v[i] / (1 - pow(beta2, t));
  99.             biases[i] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  100.         }
  101.     }
  102. };
  103.  
  104. class AdamWOptimizer {
  105. public:
  106.     double lr;         // Learning rate
  107.     double beta1;      // Exponential decay rate for the first moment estimates
  108.     double beta2;      // Exponential decay rate for the second moment estimates
  109.     double epsilon;    // Small constant to prevent division by zero
  110.     double weightDecay; // Weight decay coefficient (L2 regularization)
  111.     int t;             // Time step
  112.  
  113.     AdamWOptimizer(double learning_rate, double beta1, double beta2, double epsilon, double weightDecay)
  114.         : lr(learning_rate), beta1(beta1), beta2(beta2), epsilon(epsilon), weightDecay(weightDecay), t(0) {}
  115.  
  116.     void update(vector<vector<double>>& weights, vector<vector<double>>& m, vector<vector<double>>& v, const vector<vector<double>>& grads) {
  117.         t++;
  118.         for (size_t i = 0; i < weights.size(); ++i) {
  119.             for (size_t j = 0; j < weights[0].size(); ++j) {
  120.                 // Update biased first moment estimate
  121.                 m[i][j] = beta1 * m[i][j] + (1 - beta1) * grads[i][j];
  122.                
  123.                 // Update biased second raw moment estimate
  124.                 v[i][j] = beta2 * v[i][j] + (1 - beta2) * grads[i][j] * grads[i][j];
  125.                
  126.                 // Compute bias-corrected first moment estimate
  127.                 double m_hat = m[i][j] / (1 - pow(beta1, t));
  128.                
  129.                 // Compute bias-corrected second raw moment estimate
  130.                 double v_hat = v[i][j] / (1 - pow(beta2, t));
  131.                
  132.                 // Apply weight decay
  133.                 weights[i][j] -= lr * weightDecay * weights[i][j];
  134.                
  135.                 // Update weights with AdamW rule
  136.                 weights[i][j] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  137.             }
  138.         }
  139.     }
  140.  
  141.     void update(vector<double>& biases, vector<double>& m, vector<double>& v, const vector<double>& grads) {
  142.         t++;
  143.         for (size_t i = 0; i < biases.size(); ++i) {
  144.             // Update biased first moment estimate
  145.             m[i] = beta1 * m[i] + (1 - beta1) * grads[i];
  146.            
  147.             // Update biased second raw moment estimate
  148.             v[i] = beta2 * v[i] + (1 - beta2) * grads[i] * grads[i];
  149.            
  150.             // Compute bias-corrected first moment estimate
  151.             double m_hat = m[i] / (1 - pow(beta1, t));
  152.            
  153.             // Compute bias-corrected second raw moment estimate
  154.             double v_hat = v[i] / (1 - pow(beta2, t));
  155.            
  156.             // Apply weight decay (biases typically don't have weight decay, but adding for completeness)
  157.             biases[i] -= lr * weightDecay * biases[i];
  158.            
  159.             // Update biases with AdamW rule
  160.             biases[i] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  161.         }
  162.     }
  163. };
  164.  
  165.  
  166. class Layer {
  167. public:
  168.     vector<vector<double>> weights;
  169.     vector<double> biases;
  170.     vector<vector<double>> grads_weights;
  171.     vector<double> grads_biases;
  172.     vector<vector<double>> m_weights;
  173.     vector<vector<double>> v_weights;
  174.     vector<double> m_biases;
  175.     vector<double> v_biases;
  176.     vector<vector<double>> cache_inputs;
  177.     vector<vector<double>> cache_z;
  178.     string activation;
  179.     AdamWOptimizer optimizer;
  180.  
  181.     Layer(int input_dim, int output_dim, const string& activation, AdamWOptimizer optimizer)
  182.         : optimizer(optimizer) {
  183.         random_device rd;
  184.         mt19937 gen(rd());
  185.         normal_distribution<> d(0, 0.01);
  186.         normal_distribution<> dist(0, std::sqrt(2.0 / input_dim));
  187.  
  188.         this->activation = activation;
  189.         weights.resize(input_dim, vector<double>(output_dim));
  190.         grads_weights.resize(input_dim, vector<double>(output_dim, 0.0));
  191.         m_weights.resize(input_dim, vector<double>(output_dim, 0.0));
  192.         v_weights.resize(input_dim, vector<double>(output_dim, 0.0));
  193.         biases.resize(output_dim, 0.0);
  194.         grads_biases.resize(output_dim, 0.0);
  195.         m_biases.resize(output_dim, 0.0);
  196.         v_biases.resize(output_dim, 0.0);
  197.  
  198.         for (int i = 0; i < input_dim; ++i) {
  199.             for (int j = 0; j < output_dim; ++j) {
  200.                 if(activation != "relu"){
  201.                     weights[i][j] = d(gen);
  202.                 } else {
  203.                     weights[i][j] = dist(gen);
  204.                 }
  205.             }
  206.         }
  207.     }
  208.  
  209.     vector<vector<double>> forward(const vector<vector<double>>& inputs) {
  210.         int batch_size = inputs.size();
  211.         int output_dim = weights[0].size();
  212.  
  213.         vector<vector<double>> z(batch_size, vector<double>(output_dim));
  214.         vector<vector<double>> a(batch_size, vector<double>(output_dim));
  215.  
  216.         for (int i = 0; i < batch_size; ++i) {
  217.             for (int j = 0; j < output_dim; ++j) {
  218.                 for (int k = 0; k < inputs[0].size(); ++k) {
  219.                     z[i][j] += inputs[i][k] * weights[k][j];
  220.                 }
  221.                 z[i][j] += biases[j];
  222.                 a[i][j] = (activation == "relu") ? max(0.0, z[i][j]) : z[i][j];
  223.             }
  224.         }
  225.  
  226.         cache_inputs = inputs;
  227.         cache_z = z;
  228.         return a;
  229.     }
  230.  
  231.     vector<vector<double>> backward(const vector<vector<double>>& grad_output, double& grad_clip) {
  232.         int batch_size = cache_inputs.size();
  233.         int input_dim = cache_inputs[0].size();
  234.         int output_dim = weights[0].size();
  235.  
  236.         vector<vector<double>> grad_inputs(batch_size, vector<double>(input_dim));
  237.  
  238.         for (int i = 0; i < batch_size; ++i) {
  239.             for (int j = 0; j < output_dim; ++j) {
  240.                 double grad_z = (activation == "relu") ? ((cache_z[i][j] > 0) ? grad_output[i][j] : 0.0) : grad_output[i][j];
  241.  
  242.                 if(grad_z < grad_clip) {
  243.                     grad_z = grad_clip;
  244.                 } else if(grad_z > grad_clip) {
  245.                     grad_z = -grad_clip;
  246.                 }
  247.  
  248.                 grads_biases[j] += grad_z;
  249.                 for (int k = 0; k < input_dim; ++k) {
  250.                     grads_weights[k][j] += cache_inputs[i][k] * grad_z;
  251.                     grad_inputs[i][k] += weights[k][j] * grad_z;
  252.                 }
  253.             }
  254.         }
  255.  
  256.         return grad_inputs;
  257.     }
  258.  
  259.     void update_weights() {
  260.         optimizer.update(weights, m_weights, v_weights, grads_weights);
  261.         optimizer.update(biases, m_biases, v_biases, grads_biases);
  262.     }
  263.  
  264.     void reset_gradients() {
  265.         for (auto& row : grads_weights) {
  266.             fill(row.begin(), row.end(), 0.0);
  267.         }
  268.         fill(grads_biases.begin(), grads_biases.end(), 0.0);
  269.     }
  270. private:
  271.     double He_initialzation(double input_size) {
  272.         // He initialization standard deviation
  273.         double stddev = std::sqrt(2.0 / input_size);
  274.  
  275.         // Random number generator with normal distribution
  276.         std::random_device rd;
  277.         std::mt19937 gen(rd());
  278.         std::normal_distribution<double> dist(0.0, stddev);
  279.  
  280.         return dist(gen);
  281.     }
  282. };
  283.  
  284. class NeuralNetwork {
  285. public:
  286.     vector<Layer> layers;
  287.  
  288.     void add_layer(const Layer& layer) {
  289.         layers.push_back(layer);
  290.     }
  291.  
  292.     vector<vector<double>> forward(const vector<vector<double>>& inputs) {
  293.         vector<vector<double>> out = inputs;
  294.         for (auto& layer : layers) {
  295.             out = layer.forward(out);
  296.         }
  297.         return out;
  298.     }
  299.  
  300.     void backward(const vector<vector<double>>& grad_output, double& grad_clip) {
  301.         vector<vector<double>> grad = grad_output;
  302.         for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
  303.             grad = it->backward(grad, grad_clip);
  304.         }
  305.     }
  306.  
  307.     void update_weights() {
  308.         for (auto& layer : layers) {
  309.             layer.update_weights();
  310.             layer.reset_gradients();
  311.         }
  312.     }
  313. };
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement