Advertisement
Trainlover08

include/ai_folder/ai_versions/ai_v0.2/neural_network.cpp

Oct 29th, 2024
24
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.24 KB | None | 0 0
  1. // (include/ai_folder/ai_versions/ai_v0.2/neural_network.cpp)
  2.  
  3.  
  4. #include <iostream>
  5. #include <vector>
  6. #include <cmath>
  7. #include <random>
  8. #include <cassert>
  9. #include <algorithm>
  10. #include <deque>
  11. #include <tuple>
  12. #include <fstream>
  13. #include <stdexcept>
  14.  
  15. using namespace std;
  16.  
  17. class ReplayBuffer {
  18. private:
  19. struct Transition {
  20. std::vector<double> state;
  21. int action;
  22. double reward;
  23. std::vector<double> nextState;
  24. bool done;
  25. };
  26.  
  27. std::deque<Transition> buffer;
  28. size_t capacity;
  29. std::mt19937 rng;
  30.  
  31. public:
  32. ReplayBuffer(size_t capacity) : capacity(capacity) {
  33. std::random_device rd;
  34. rng = std::mt19937(rd());
  35. }
  36.  
  37. void storeTransition(const std::vector<double>& state, int action, double reward, const std::vector<double>& nextState, bool done) {
  38. if (buffer.size() >= capacity) {
  39. buffer.pop_front();
  40. }
  41. buffer.push_back({state, action, reward, nextState, done});
  42. }
  43.  
  44. std::tuple<std::vector<std::vector<double>>, std::vector<int>, std::vector<double>, std::vector<std::vector<double>>, std::vector<bool>>
  45. sampleBatch(size_t batchSize) {
  46. std::vector<std::vector<double>> states(batchSize);
  47. std::vector<int> actions(batchSize);
  48. std::vector<double> rewards(batchSize);
  49. std::vector<std::vector<double>> nextStates(batchSize);
  50. std::vector<bool> dones(batchSize);
  51.  
  52. std::uniform_int_distribution<size_t> dist(0, buffer.size() - 1);
  53.  
  54. for (size_t i = 0; i < batchSize; ++i) {
  55. size_t idx = dist(rng);
  56. const Transition& transition = buffer[idx];
  57.  
  58. states[i] = transition.state;
  59. actions[i] = transition.action;
  60. rewards[i] = transition.reward;
  61. nextStates[i] = transition.nextState;
  62. dones[i] = transition.done;
  63. }
  64.  
  65. return {states, actions, rewards, nextStates, dones};
  66. }
  67.  
  68. bool isReady(size_t batchSize) const {
  69. return buffer.size() >= batchSize;
  70. }
  71. };
  72.  
  73. class AdamOptimizer {
  74. public:
  75. double lr;
  76. double beta1;
  77. double beta2;
  78. double epsilon;
  79. int t;
  80.  
  81. AdamOptimizer(double learning_rate, double beta1, double beta2, double epsilon)
  82. : lr(learning_rate), beta1(beta1), beta2(beta2), epsilon(epsilon), t(0) {}
  83.  
  84. void update(vector<vector<double>>& weights, vector<vector<double>>& m, vector<vector<double>>& v, const vector<vector<double>>& grads) {
  85. t++;
  86. for (size_t i = 0; i < weights.size(); ++i) {
  87. for (size_t j = 0; j < weights[0].size(); ++j) {
  88. m[i][j] = beta1 * m[i][j] + (1 - beta1) * grads[i][j];
  89. v[i][j] = beta2 * v[i][j] + (1 - beta2) * grads[i][j] * grads[i][j];
  90. double m_hat = m[i][j] / (1 - pow(beta1, t));
  91. double v_hat = v[i][j] / (1 - pow(beta2, t));
  92. weights[i][j] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  93. }
  94. }
  95. }
  96.  
  97. void update(vector<double>& biases, vector<double>& m, vector<double>& v, const vector<double>& grads) {
  98. t++;
  99. for (size_t i = 0; i < biases.size(); ++i) {
  100. m[i] = beta1 * m[i] + (1 - beta1) * grads[i];
  101. v[i] = beta2 * v[i] + (1 - beta2) * grads[i] * grads[i];
  102. double m_hat = m[i] / (1 - pow(beta1, t));
  103. double v_hat = v[i] / (1 - pow(beta2, t));
  104. biases[i] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  105. }
  106. }
  107. };
  108.  
  109. class AdamWOptimizer {
  110. public:
  111. double lr; // Learning rate
  112. double beta1; // Exponential decay rate for the first moment estimates
  113. double beta2; // Exponential decay rate for the second moment estimates
  114. double epsilon; // Small constant to prevent division by zero
  115. double weightDecay; // Weight decay coefficient (L2 regularization)
  116. int t; // Time step
  117.  
  118. AdamWOptimizer(double learning_rate, double beta1, double beta2, double epsilon, double weightDecay)
  119. : lr(learning_rate), beta1(beta1), beta2(beta2), epsilon(epsilon), weightDecay(weightDecay), t(0) {}
  120.  
  121. void update(vector<vector<double>>& weights, vector<vector<double>>& m, vector<vector<double>>& v, const vector<vector<double>>& grads) {
  122. t++;
  123. for (size_t i = 0; i < weights.size(); ++i) {
  124. for (size_t j = 0; j < weights[0].size(); ++j) {
  125. // Update biased first moment estimate
  126. m[i][j] = beta1 * m[i][j] + (1 - beta1) * grads[i][j];
  127.  
  128. // Update biased second raw moment estimate
  129. v[i][j] = beta2 * v[i][j] + (1 - beta2) * grads[i][j] * grads[i][j];
  130.  
  131. // Compute bias-corrected first moment estimate
  132. double m_hat = m[i][j] / (1 - pow(beta1, t));
  133.  
  134. // Compute bias-corrected second raw moment estimate
  135. double v_hat = v[i][j] / (1 - pow(beta2, t));
  136.  
  137. // Apply weight decay
  138. weights[i][j] -= lr * weightDecay * weights[i][j];
  139.  
  140. // Update weights with AdamW rule
  141. weights[i][j] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  142. }
  143. }
  144. }
  145.  
  146. void update(vector<double>& biases, vector<double>& m, vector<double>& v, const vector<double>& grads) {
  147. t++;
  148. for (size_t i = 0; i < biases.size(); ++i) {
  149. // Update biased first moment estimate
  150. m[i] = beta1 * m[i] + (1 - beta1) * grads[i];
  151.  
  152. // Update biased second raw moment estimate
  153. v[i] = beta2 * v[i] + (1 - beta2) * grads[i] * grads[i];
  154.  
  155. // Compute bias-corrected first moment estimate
  156. double m_hat = m[i] / (1 - pow(beta1, t));
  157.  
  158. // Compute bias-corrected second raw moment estimate
  159. double v_hat = v[i] / (1 - pow(beta2, t));
  160.  
  161. // Apply weight decay (biases typically don't have weight decay, but adding for completeness)
  162. biases[i] -= lr * weightDecay * biases[i];
  163.  
  164. // Update biases with AdamW rule
  165. biases[i] -= lr * m_hat / (sqrt(v_hat) + epsilon);
  166. }
  167. }
  168.  
  169. // Serialize optimizer state to a file
  170. void save(ofstream& outFile) const {
  171. outFile << lr << " " << beta1 << " " << beta2 << " " << epsilon << " " << weightDecay << " " << t << "\n";
  172. }
  173.  
  174. // Deserialize optimizer state from a file
  175. void load(ifstream& inFile) {
  176. inFile >> lr >> beta1 >> beta2 >> epsilon >> weightDecay >> t;
  177. }
  178. };
  179.  
  180.  
  181. class Layer {
  182. public:
  183. vector<vector<double>> weights;
  184. vector<double> biases;
  185. vector<vector<double>> grads_weights;
  186. vector<double> grads_biases;
  187. vector<vector<double>> m_weights;
  188. vector<vector<double>> v_weights;
  189. vector<double> m_biases;
  190. vector<double> v_biases;
  191. vector<vector<double>> cache_inputs;
  192. vector<vector<double>> cache_z;
  193. string activation;
  194. AdamWOptimizer optimizer;
  195.  
  196. Layer(int input_dim, int output_dim, const string& activation, AdamWOptimizer optimizer)
  197. : optimizer(optimizer) {
  198. random_device rd;
  199. mt19937 gen(rd());
  200. normal_distribution<> d(0, 0.01);
  201. normal_distribution<> dist(0, std::sqrt(2.0 / input_dim));
  202.  
  203. this->activation = activation;
  204. weights.resize(input_dim, vector<double>(output_dim));
  205. grads_weights.resize(input_dim, vector<double>(output_dim, 0.0));
  206. m_weights.resize(input_dim, vector<double>(output_dim, 0.0));
  207. v_weights.resize(input_dim, vector<double>(output_dim, 0.0));
  208. biases.resize(output_dim, 0.0);
  209. grads_biases.resize(output_dim, 0.0);
  210. m_biases.resize(output_dim, 0.0);
  211. v_biases.resize(output_dim, 0.0);
  212.  
  213. for (int i = 0; i < input_dim; ++i) {
  214. for (int j = 0; j < output_dim; ++j) {
  215. if(activation != "relu"){
  216. weights[i][j] = d(gen);
  217. } else {
  218. weights[i][j] = dist(gen);
  219. }
  220. }
  221. }
  222. }
  223.  
  224. vector<vector<double>> forward(const vector<vector<double>>& inputs) {
  225. int batch_size = inputs.size();
  226. int output_dim = weights[0].size();
  227.  
  228. vector<vector<double>> z(batch_size, vector<double>(output_dim));
  229. vector<vector<double>> a(batch_size, vector<double>(output_dim));
  230.  
  231. for (int i = 0; i < batch_size; ++i) {
  232. for (int j = 0; j < output_dim; ++j) {
  233. for (int k = 0; k < inputs[0].size(); ++k) {
  234. z[i][j] += inputs[i][k] * weights[k][j];
  235. }
  236. z[i][j] += biases[j];
  237. a[i][j] = (activation == "relu") ? max(0.0, z[i][j]) : z[i][j];
  238. }
  239. }
  240.  
  241. cache_inputs = inputs;
  242. cache_z = z;
  243. return a;
  244. }
  245.  
  246. vector<vector<double>> backward(const vector<vector<double>>& grad_output, double& grad_clip) {
  247. int batch_size = cache_inputs.size();
  248. int input_dim = cache_inputs[0].size();
  249. int output_dim = weights[0].size();
  250.  
  251. vector<vector<double>> grad_inputs(batch_size, vector<double>(input_dim));
  252.  
  253. for (int i = 0; i < batch_size; ++i) {
  254. for (int j = 0; j < output_dim; ++j) {
  255. double grad_z = (activation == "relu") ? ((cache_z[i][j] > 0) ? grad_output[i][j] : 0.0) : grad_output[i][j];
  256.  
  257. if(grad_z < grad_clip) {
  258. grad_z = grad_clip;
  259. } else if(grad_z > grad_clip) {
  260. grad_z = -grad_clip;
  261. }
  262.  
  263. grads_biases[j] += grad_z;
  264. for (int k = 0; k < input_dim; ++k) {
  265. grads_weights[k][j] += cache_inputs[i][k] * grad_z;
  266. grad_inputs[i][k] += weights[k][j] * grad_z;
  267. }
  268. }
  269. }
  270.  
  271. return grad_inputs;
  272. }
  273.  
  274. void update_weights() {
  275. optimizer.update(weights, m_weights, v_weights, grads_weights);
  276. optimizer.update(biases, m_biases, v_biases, grads_biases);
  277. }
  278.  
  279. void reset_gradients() {
  280. for (auto& row : grads_weights) {
  281. fill(row.begin(), row.end(), 0.0);
  282. }
  283. fill(grads_biases.begin(), grads_biases.end(), 0.0);
  284. }
  285.  
  286. // Serialize the layer's parameters to a file
  287. void save(ofstream& outFile) const {
  288. // Save weights
  289. for (const auto& row : weights) {
  290. for (double val : row) {
  291. outFile << val << " ";
  292. }
  293. }
  294. outFile << "\n";
  295.  
  296. // Save biases
  297. for (double val : biases) {
  298. outFile << val << " ";
  299. }
  300. outFile << "\n";
  301.  
  302. // Save optimizer states
  303. optimizer.save(outFile);
  304. }
  305.  
  306. // Deserialize the layer's parameters from a file
  307. void load(ifstream& inFile) {
  308. // Load weights
  309. for (auto& row : weights) {
  310. for (double& val : row) {
  311. inFile >> val;
  312. }
  313. }
  314.  
  315. // Load biases
  316. for (double& val : biases) {
  317. inFile >> val;
  318. }
  319.  
  320. // Load optimizer states
  321. optimizer.load(inFile);
  322. }
  323.  
  324. private:
  325. double He_initialzation(double input_size) {
  326. // He initialization standard deviation
  327. double stddev = std::sqrt(2.0 / input_size);
  328.  
  329. // Random number generator with normal distribution
  330. std::random_device rd;
  331. std::mt19937 gen(rd());
  332. std::normal_distribution<double> dist(0.0, stddev);
  333.  
  334. return dist(gen);
  335. }
  336. };
  337.  
  338. class NeuralNetwork {
  339. public:
  340. vector<Layer> layers;
  341.  
  342. void add_layer(const Layer& layer) {
  343. layers.push_back(layer);
  344. }
  345.  
  346. vector<vector<double>> forward(const vector<vector<double>>& inputs) {
  347. vector<vector<double>> out = inputs;
  348. for (auto& layer : layers) {
  349. out = layer.forward(out);
  350. }
  351. return out;
  352. }
  353.  
  354. void backward(const vector<vector<double>>& grad_output, double& grad_clip) {
  355. vector<vector<double>> grad = grad_output;
  356. for (auto it = layers.rbegin(); it != layers.rend(); ++it) {
  357. grad = it->backward(grad, grad_clip);
  358. }
  359. }
  360.  
  361. void update_weights() {
  362. for (auto& layer : layers) {
  363. layer.update_weights();
  364. layer.reset_gradients();
  365. }
  366. }
  367.  
  368. void softUpdate(NeuralNetwork& targetNetwork, double tau) {
  369. assert(layers.size() == targetNetwork.layers.size());
  370.  
  371. // Update each layer in the target network
  372. for (size_t i = 0; i < layers.size(); ++i) {
  373. Layer& localLayer = layers[i];
  374. Layer& targetLayer = targetNetwork.layers[i];
  375.  
  376. // Update weights and biases for each layer
  377. for (size_t j = 0; j < localLayer.weights.size(); ++j) {
  378. for(size_t k = 0; k < localLayer.weights[j].size(); ++k) {
  379. targetLayer.weights[j][k] = tau * localLayer.weights[j][k] + (1 - tau) * targetLayer.weights[j][k];
  380. }
  381. }
  382.  
  383. for (size_t j = 0; j < localLayer.biases.size(); ++j) {
  384. targetLayer.biases[j] = tau * localLayer.biases[j] + (1 - tau) * targetLayer.biases[j];
  385. }
  386. }
  387. }
  388.  
  389. // Serialize the entire neural network to a file
  390. void save(const string& filename) const {
  391. ofstream outFile(filename);
  392. if (!outFile) {
  393. cerr << "Error: Could not open file for writing: " << filename << endl;
  394. return;
  395. }
  396.  
  397. // Save the number of layers
  398. outFile << layers.size() << "\n";
  399.  
  400. // Save each layer
  401. for (const Layer& layer : layers) {
  402. outFile << layer.weights.size() << " " << layer.weights[0].size() << " " << layer.activation << "\n";
  403. layer.save(outFile);
  404. }
  405.  
  406. outFile.close();
  407. }
  408.  
  409. // Deserialize the entire neural network from a file
  410. void load(const string& filename) {
  411. ifstream inFile(filename);
  412. if (!inFile) {
  413. cerr << "Error: Could not open file for reading: " << filename << endl;
  414. throw std::invalid_argument("Make sure that the file with that name exists. If it is the first generation, set the enumerated boolean 'E_NEW_NET' to 'true'");
  415. return;
  416. }
  417.  
  418. // Load the number of layers
  419. size_t numLayers;
  420. inFile >> numLayers;
  421.  
  422. layers.clear(); // Clear existing layers
  423.  
  424. // Load each layer
  425. for (size_t i = 0; i < numLayers; ++i) {
  426. int input_dim, output_dim;
  427. string activation;
  428. inFile >> input_dim >> output_dim >> activation;
  429.  
  430. AdamWOptimizer optimizer(0.001, 0.9, 0.999, 1e-8, 0.01); // Initialize optimizer with dummy values
  431. Layer layer(input_dim, output_dim, activation, optimizer);
  432. layer.load(inFile);
  433. layers.push_back(layer);
  434. }
  435.  
  436. inFile.close();
  437. }
  438. };
  439.  
  440. // Gradient ascent to update parameters
  441. void gradientAscent(std::vector<double>& params, const std::vector<double>& gradients, double learningRate) {
  442. std::transform(params.begin(), params.end(), gradients.begin(), params.begin(), [learningRate](double p, double g) {
  443. return p + learningRate * g;
  444. });
  445. }
  446.  
  447. // Compute the loss using negative log likelihood
  448. double computeLoss(const std::vector<double>& logProbs, const std::vector<double>& advantages) {
  449. double loss = 0.0;
  450. for (size_t i = 0; i < logProbs.size(); ++i) {
  451. loss += logProbs[i] * advantages[i];
  452. }
  453. return loss;
  454. }
  455.  
  456. // Compute the advantage using reward-to-go method
  457. double computeAdvantage(const std::vector<double>& rewards, int t, double gamma) {
  458. double advantage = 0.0;
  459. double discount = 1.0;
  460. for (int i = t; i < rewards.size(); ++i) {
  461. advantage += discount * rewards[i];
  462. discount *= gamma;
  463. }
  464. return advantage;
  465. }
  466.  
  467. // Function to safely compute log values with clipping
  468. inline float safeLog(float x, float min_value = 1e-6f) {
  469. // Clip the input x to avoid log of zero or negative numbers
  470. float clipped_x = std::max(x, min_value);
  471. return std::log(clipped_x);
  472. }
  473.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement