Advertisement
STANAANDREY

mlp

Nov 5th, 2024
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 8.59 KB | None | 0 0
  1. #include <iostream>
  2. #include <vector>
  3. #include <thread>
  4. #include <functional>
  5. #include <cmath>
  6. #include <random>
  7. #include <mutex>
  8. #include <cassert>
  9.  
  10. // Sigmoid activation function and its derivative
  11. double sigmoid(double x) {
  12.     return 1.0 / (1.0 + std::exp(-x));
  13. }
  14.  
  15. double sigmoid_derivative(double x) {
  16.     double s = sigmoid(x);
  17.     return s * (1 - s);
  18. }
  19.  
  20. // Mean Squared Error loss function and its derivative
  21. double mse(const std::vector<double>& targets, const std::vector<double>& outputs) {
  22.     assert(targets.size() == outputs.size());
  23.     double sum = 0.0;
  24.     for (size_t i = 0; i < targets.size(); ++i) {
  25.         double diff = targets[i] - outputs[i];
  26.         sum += diff * diff;
  27.     }
  28.     return sum / targets.size();
  29. }
  30.  
  31. class Neuron {
  32. public:
  33.     std::vector<double> weights;
  34.     double bias;
  35.  
  36.     double last_input_sum; // z = w·x + b
  37.     double last_output;    // sigmoid(z)
  38.  
  39.     Neuron(int num_inputs) {
  40.         std::random_device rd;
  41.         std::mt19937 gen(rd());
  42.         std::uniform_real_distribution<> dis(-1, 1);
  43.         for (int i = 0; i < num_inputs; ++i) {
  44.             weights.push_back(dis(gen));
  45.         }
  46.         bias = dis(gen);
  47.     }
  48.  
  49.     double compute_output(const std::vector<double>& inputs) {
  50.         last_input_sum = bias;
  51.         for (size_t i = 0; i < weights.size(); ++i) {
  52.             last_input_sum += weights[i] * inputs[i];
  53.         }
  54.         last_output = sigmoid(last_input_sum);
  55.         return last_output;
  56.     }
  57. };
  58.  
  59. class Layer {
  60. public:
  61.     std::vector<Neuron> neurons;
  62.     std::vector<double> outputs; // Outputs after activation
  63.     std::vector<double> inputs;  // Inputs to this layer
  64.  
  65.     // Initialize layer with number of neurons and inputs per neuron
  66.     Layer(int num_neurons, int num_inputs) {
  67.         for (int i = 0; i < num_neurons; ++i) {
  68.             neurons.emplace_back(num_inputs);
  69.         }
  70.     }
  71.  
  72.     // Compute layer outputs using multithreading
  73.     std::vector<double> compute_outputs(const std::vector<double>& input) {
  74.         inputs = input; // Store input for backpropagation
  75.         outputs.resize(neurons.size(), 0.0);
  76.         std::vector<std::thread> threads;
  77.  
  78.         // Mutex for synchronized writing to outputs vector
  79.         // Not strictly necessary here since each thread writes to a unique index
  80.         // But kept for safety
  81.         std::mutex output_mutex;
  82.  
  83.         // Lambda function to compute a single neuron's output
  84.         auto compute_neuron = [&](int index) {
  85.             double out = neurons[index].compute_output(input);
  86.             // Protect writing to the outputs vector
  87.             {
  88.                 // std::lock_guard<std::mutex> lock(output_mutex);
  89.                 // Since each thread writes to a unique index, mutex can be omitted
  90.                 outputs[index] = out;
  91.             }
  92.         };
  93.  
  94.         // Launch a thread for each neuron in the layer
  95.         for (size_t i = 0; i < neurons.size(); ++i) {
  96.             threads.emplace_back(compute_neuron, i);
  97.         }
  98.  
  99.         // Join all threads
  100.         for (auto& th : threads) {
  101.             th.join();
  102.         }
  103.  
  104.         return outputs;
  105.     }
  106. };
  107.  
  108. // MLP class
  109. class MLP {
  110. public:
  111.     std::vector<Layer> layers;
  112.  
  113.     // For storing output errors during backpropagation
  114.     std::vector<std::vector<double>> layer_deltas;
  115.  
  116.     // Initialize MLP with layer sizes (e.g., {2, 4, 1})
  117.     MLP(const std::vector<int>& layer_sizes) {
  118.         if (layer_sizes.size() < 2) {
  119.             throw std::invalid_argument("MLP must have at least input and output layers");
  120.         }
  121.         for (size_t i = 1; i < layer_sizes.size(); ++i) {
  122.             layers.emplace_back(Layer(layer_sizes[i], layer_sizes[i - 1]));
  123.         }
  124.     }
  125.  
  126.     // feed_forward pass: compute output given input
  127.     std::vector<double> feed_forward(const std::vector<double>& input) {
  128.         std::vector<double> activation = input;
  129.         for (auto& layer : layers) {
  130.             activation = layer.compute_outputs(activation);
  131.         }
  132.         return activation;
  133.     }
  134.  
  135.     // Backward pass: backpropagation to compute gradients
  136.     void backward(const std::vector<double>& target, double learning_rate) {
  137.         // Calculate delta for output layer
  138.         int num_layers = layers.size();
  139.         layer_deltas.resize(num_layers, std::vector<double>());
  140.         // Start from the output layer and move backwards
  141.  
  142.         // Output layer
  143.         for (size_t i = 0; i < layers.back().neurons.size(); ++i) {
  144.             double output = layers.back().outputs[i];
  145.             double error = target[i] - output;
  146.             double delta = error * sigmoid_derivative(layers.back().neurons[i].last_input_sum);
  147.             layer_deltas.back().push_back(delta);
  148.         }
  149.  
  150.         // Hidden layers
  151.         for (int l = num_layers - 2; l >= 0; --l) {
  152.             for (size_t i = 0; i < layers[l].neurons.size(); ++i) {
  153.                 double error = 0.0;
  154.                 // Sum the delta from the next layer multiplied by the corresponding weight
  155.                 for (size_t j = 0; j < layers[l + 1].neurons.size(); ++j) {
  156.                     error += layer_deltas[l + 1][j] * layers[l + 1].neurons[j].weights[i];
  157.                 }
  158.                 double delta = error * sigmoid_derivative(layers[l].neurons[i].last_input_sum);
  159.                 layer_deltas[l].push_back(delta);
  160.             }
  161.         }
  162.  
  163.         // Update weights and biases
  164.         for (size_t l = 0; l < layers.size(); ++l) {
  165.             // Determine inputs to this layer
  166.             std::vector<double> inputs;
  167.             if (l == 0) {
  168.                 inputs = layers[l].inputs; // Inputs stored during feed_forward pass
  169.             } else {
  170.                 inputs = layers[l - 1].outputs;
  171.             }
  172.  
  173.             for (size_t n = 0; n < layers[l].neurons.size(); ++n) {
  174.                 // Update weights
  175.                 for (size_t w = 0; w < layers[l].neurons[n].weights.size(); ++w) {
  176.                     layers[l].neurons[n].weights[w] += learning_rate * layer_deltas[l][n] * inputs[w];
  177.                 }
  178.                 // Update bias
  179.                 layers[l].neurons[n].bias += learning_rate * layer_deltas[l][n];
  180.             }
  181.         }
  182.  
  183.         // Clear deltas for next iteration
  184.         for (auto& delta_layer : layer_deltas) {
  185.             delta_layer.clear();
  186.         }
  187.     }
  188.  
  189.     // Train the network on a single sample
  190.     void train_sample(const std::vector<double>& input, const std::vector<double>& target, double learning_rate) {
  191.         feed_forward(input);
  192.         backward(target, learning_rate);
  193.     }
  194.  
  195.     // Predict output for a given input
  196.     std::vector<double> predict(const std::vector<double>& input) {
  197.         return feed_forward(input);
  198.     }
  199. };
  200.  
  201. void print_vector(const std::vector<double>& vec) {
  202.     std::cout << "[";
  203.     for (size_t i = 0; i < vec.size(); ++i) {
  204.         printf("%.4f", vec[i]);
  205.         if (i != vec.size() - 1)
  206.             std::cout << ", ";
  207.     }
  208.     std::cout << "]";
  209. }
  210.  
  211. int main() {
  212.     // Network architecture: 2-input, 4 hidden neurons, 1-output
  213.     std::vector<int> layer_sizes = {2, 4, 1};
  214.     MLP mlp(layer_sizes);
  215.  
  216.     // XOR dataset
  217.     std::vector<std::vector<double>> inputs = {
  218.         {0.0, 0.0},
  219.         {0.0, 1.0},
  220.         {1.0, 0.0},
  221.         {1.0, 1.0}
  222.     };
  223.  
  224.     std::vector<std::vector<double>> targets = {
  225.         {0.0},
  226.         {1.0},
  227.         {1.0},
  228.         {0.0}
  229.     };
  230.  
  231.     double learning_rate = 0.5;
  232.     int epochs = 20000;
  233.  
  234.     // Training loop
  235.     for (int epoch = 0; epoch < epochs; ++epoch) {
  236.         double total_loss = 0.0;
  237.         for (size_t i = 0; i < inputs.size(); ++i) {
  238.             mlp.train_sample(inputs[i], targets[i], learning_rate);
  239.             std::vector<double> output = mlp.predict(inputs[i]);
  240.             total_loss += mse(targets[i], output);
  241.         }
  242.         total_loss /= inputs.size();
  243.  
  244.         if ((epoch + 1) % 2000 == 0) {
  245.             std::cout << "Epoch " << epoch + 1 << " - Loss: " << total_loss << std::endl;
  246.         }
  247.  
  248.         if (total_loss < 0.001) {
  249.             std::cout << "Early stopping at epoch " << epoch + 1 << " with loss " << total_loss << std::endl;
  250.             break;
  251.         }
  252.     }
  253.  
  254.     std::cout << "\nTesting the trained network on XOR inputs:\n";
  255.     for (size_t i = 0; i < inputs.size(); ++i) {
  256.         std::vector<double> output = mlp.predict(inputs[i]);
  257.         std::cout << "Input: ";
  258.         print_vector(inputs[i]);
  259.         std::cout << " - Predicted Output: ";
  260.         print_vector(output);
  261.         std::cout << " - Target: ";
  262.         print_vector(targets[i]);
  263.         std::cout << std::endl;
  264.     }
  265.  
  266.     return 0;
  267. }
  268.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement