Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <vector>
- #include <thread>
- #include <functional>
- #include <cmath>
- #include <random>
- #include <mutex>
- #include <cassert>
- // Sigmoid activation function and its derivative
- double sigmoid(double x) {
- return 1.0 / (1.0 + std::exp(-x));
- }
- double sigmoid_derivative(double x) {
- double s = sigmoid(x);
- return s * (1 - s);
- }
- // Mean Squared Error loss function and its derivative
- double mse(const std::vector<double>& targets, const std::vector<double>& outputs) {
- assert(targets.size() == outputs.size());
- double sum = 0.0;
- for (size_t i = 0; i < targets.size(); ++i) {
- double diff = targets[i] - outputs[i];
- sum += diff * diff;
- }
- return sum / targets.size();
- }
- class Neuron {
- public:
- std::vector<double> weights;
- double bias;
- double last_input_sum; // z = w·x + b
- double last_output; // sigmoid(z)
- Neuron(int num_inputs) {
- std::random_device rd;
- std::mt19937 gen(rd());
- std::uniform_real_distribution<> dis(-1, 1);
- for (int i = 0; i < num_inputs; ++i) {
- weights.push_back(dis(gen));
- }
- bias = dis(gen);
- }
- double compute_output(const std::vector<double>& inputs) {
- last_input_sum = bias;
- for (size_t i = 0; i < weights.size(); ++i) {
- last_input_sum += weights[i] * inputs[i];
- }
- last_output = sigmoid(last_input_sum);
- return last_output;
- }
- };
- class Layer {
- public:
- std::vector<Neuron> neurons;
- std::vector<double> outputs; // Outputs after activation
- std::vector<double> inputs; // Inputs to this layer
- // Initialize layer with number of neurons and inputs per neuron
- Layer(int num_neurons, int num_inputs) {
- for (int i = 0; i < num_neurons; ++i) {
- neurons.emplace_back(num_inputs);
- }
- }
- // Compute layer outputs using multithreading
- std::vector<double> compute_outputs(const std::vector<double>& input) {
- inputs = input; // Store input for backpropagation
- outputs.resize(neurons.size(), 0.0);
- std::vector<std::thread> threads;
- // Mutex for synchronized writing to outputs vector
- // Not strictly necessary here since each thread writes to a unique index
- // But kept for safety
- std::mutex output_mutex;
- // Lambda function to compute a single neuron's output
- auto compute_neuron = [&](int index) {
- double out = neurons[index].compute_output(input);
- // Protect writing to the outputs vector
- {
- // std::lock_guard<std::mutex> lock(output_mutex);
- // Since each thread writes to a unique index, mutex can be omitted
- outputs[index] = out;
- }
- };
- // Launch a thread for each neuron in the layer
- for (size_t i = 0; i < neurons.size(); ++i) {
- threads.emplace_back(compute_neuron, i);
- }
- // Join all threads
- for (auto& th : threads) {
- th.join();
- }
- return outputs;
- }
- };
- // MLP class
- class MLP {
- public:
- std::vector<Layer> layers;
- // For storing output errors during backpropagation
- std::vector<std::vector<double>> layer_deltas;
- // Initialize MLP with layer sizes (e.g., {2, 4, 1})
- MLP(const std::vector<int>& layer_sizes) {
- if (layer_sizes.size() < 2) {
- throw std::invalid_argument("MLP must have at least input and output layers");
- }
- for (size_t i = 1; i < layer_sizes.size(); ++i) {
- layers.emplace_back(Layer(layer_sizes[i], layer_sizes[i - 1]));
- }
- }
- // feed_forward pass: compute output given input
- std::vector<double> feed_forward(const std::vector<double>& input) {
- std::vector<double> activation = input;
- for (auto& layer : layers) {
- activation = layer.compute_outputs(activation);
- }
- return activation;
- }
- // Backward pass: backpropagation to compute gradients
- void backward(const std::vector<double>& target, double learning_rate) {
- // Calculate delta for output layer
- int num_layers = layers.size();
- layer_deltas.resize(num_layers, std::vector<double>());
- // Start from the output layer and move backwards
- // Output layer
- for (size_t i = 0; i < layers.back().neurons.size(); ++i) {
- double output = layers.back().outputs[i];
- double error = target[i] - output;
- double delta = error * sigmoid_derivative(layers.back().neurons[i].last_input_sum);
- layer_deltas.back().push_back(delta);
- }
- // Hidden layers
- for (int l = num_layers - 2; l >= 0; --l) {
- for (size_t i = 0; i < layers[l].neurons.size(); ++i) {
- double error = 0.0;
- // Sum the delta from the next layer multiplied by the corresponding weight
- for (size_t j = 0; j < layers[l + 1].neurons.size(); ++j) {
- error += layer_deltas[l + 1][j] * layers[l + 1].neurons[j].weights[i];
- }
- double delta = error * sigmoid_derivative(layers[l].neurons[i].last_input_sum);
- layer_deltas[l].push_back(delta);
- }
- }
- // Update weights and biases
- for (size_t l = 0; l < layers.size(); ++l) {
- // Determine inputs to this layer
- std::vector<double> inputs;
- if (l == 0) {
- inputs = layers[l].inputs; // Inputs stored during feed_forward pass
- } else {
- inputs = layers[l - 1].outputs;
- }
- for (size_t n = 0; n < layers[l].neurons.size(); ++n) {
- // Update weights
- for (size_t w = 0; w < layers[l].neurons[n].weights.size(); ++w) {
- layers[l].neurons[n].weights[w] += learning_rate * layer_deltas[l][n] * inputs[w];
- }
- // Update bias
- layers[l].neurons[n].bias += learning_rate * layer_deltas[l][n];
- }
- }
- // Clear deltas for next iteration
- for (auto& delta_layer : layer_deltas) {
- delta_layer.clear();
- }
- }
- // Train the network on a single sample
- void train_sample(const std::vector<double>& input, const std::vector<double>& target, double learning_rate) {
- feed_forward(input);
- backward(target, learning_rate);
- }
- // Predict output for a given input
- std::vector<double> predict(const std::vector<double>& input) {
- return feed_forward(input);
- }
- };
- void print_vector(const std::vector<double>& vec) {
- std::cout << "[";
- for (size_t i = 0; i < vec.size(); ++i) {
- printf("%.4f", vec[i]);
- if (i != vec.size() - 1)
- std::cout << ", ";
- }
- std::cout << "]";
- }
- int main() {
- // Network architecture: 2-input, 4 hidden neurons, 1-output
- std::vector<int> layer_sizes = {2, 4, 1};
- MLP mlp(layer_sizes);
- // XOR dataset
- std::vector<std::vector<double>> inputs = {
- {0.0, 0.0},
- {0.0, 1.0},
- {1.0, 0.0},
- {1.0, 1.0}
- };
- std::vector<std::vector<double>> targets = {
- {0.0},
- {1.0},
- {1.0},
- {0.0}
- };
- double learning_rate = 0.5;
- int epochs = 20000;
- // Training loop
- for (int epoch = 0; epoch < epochs; ++epoch) {
- double total_loss = 0.0;
- for (size_t i = 0; i < inputs.size(); ++i) {
- mlp.train_sample(inputs[i], targets[i], learning_rate);
- std::vector<double> output = mlp.predict(inputs[i]);
- total_loss += mse(targets[i], output);
- }
- total_loss /= inputs.size();
- if ((epoch + 1) % 2000 == 0) {
- std::cout << "Epoch " << epoch + 1 << " - Loss: " << total_loss << std::endl;
- }
- if (total_loss < 0.001) {
- std::cout << "Early stopping at epoch " << epoch + 1 << " with loss " << total_loss << std::endl;
- break;
- }
- }
- std::cout << "\nTesting the trained network on XOR inputs:\n";
- for (size_t i = 0; i < inputs.size(); ++i) {
- std::vector<double> output = mlp.predict(inputs[i]);
- std::cout << "Input: ";
- print_vector(inputs[i]);
- std::cout << " - Predicted Output: ";
- print_vector(output);
- std::cout << " - Target: ";
- print_vector(targets[i]);
- std::cout << std::endl;
- }
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement