From c77278dc5fc7a61e3b20ffdd6f580809ad8b847a Mon Sep 17 00:00:00 2001 From: Alfred Andersson <alfan961@student.liu.se> Date: Mon, 4 Dec 2023 20:28:17 +0000 Subject: [PATCH] Neural network --- ActivationFunction.cc | 38 +++++++++++ ActivationFunction.h | 21 ++++++ Layer.cc | 145 ++++++++++++++++++++++++++++++++++++++++++ Layer.h | 32 ++++++++++ Matrix.cc | 91 ++++++++++++++++++++++++++ Matrix.h | 34 ++++++++++ NeuralNetwork.cc | 121 +++++++++++++++++++++++++++++++++++ NeuralNetwork.h | 33 ++++++++++ Neuron.cc | 64 +++++++++++++++++++ Neuron.h | 38 +++++++++++ TrainingData.cc | 13 ++++ TrainingData.h | 25 ++++++++ main.cc | 84 ++++++++++++++++++++++++ 13 files changed, 739 insertions(+) create mode 100644 ActivationFunction.cc create mode 100644 ActivationFunction.h create mode 100644 Layer.cc create mode 100644 Layer.h create mode 100644 Matrix.cc create mode 100644 Matrix.h create mode 100644 NeuralNetwork.cc create mode 100644 NeuralNetwork.h create mode 100644 Neuron.cc create mode 100644 Neuron.h create mode 100644 TrainingData.cc create mode 100644 TrainingData.h create mode 100644 main.cc diff --git a/ActivationFunction.cc b/ActivationFunction.cc new file mode 100644 index 0000000..0310b46 --- /dev/null +++ b/ActivationFunction.cc @@ -0,0 +1,38 @@ +double ActivationFunction::Linear(double v) +{ + if(v > 1) + return 1.0; + + if(v < 0) + return 0.0; + + return v; +} + +double ActivationFunction::ReLU(double v) +{ + return std::max(0.0, v); +} + +double ActivationFunction::Sigmoid(double v) +{ + return (1) / (1 + pow(M_E, -v)); +} + +double ActivationFunction::Linear_der(double v) +{ + return 1.0; +} + +double ActivationFunction::ReLU_der(double v) +{ + if(std::max(0.0, v) == 0.0) + return 0.0; + + return 1.0; +} + +double ActivationFunction::Sigmoid_der(double v) +{ + return v * (1 - v); +} \ No newline at end of file diff --git a/ActivationFunction.h b/ActivationFunction.h new file mode 100644 index 0000000..4077c9b --- /dev/null +++ b/ActivationFunction.h @@ -0,0 +1,21 @@ +#ifndef ACTIVATIONFUNCTION_H +#define ACTIVATIONFUNCTION_H + +namespace ActivationFunction +{ + + typedef std::function<double(double)> ActFunc; + + double Linear(double); + double ReLU(double); + double Sigmoid(double); + + double Linear_der(double); + double ReLU_der(double); + double Sigmoid_der(double); + +} + +#include "ActivationFunction.cc" + +#endif // ACTIVATIONFUNCTION_H \ No newline at end of file diff --git a/Layer.cc b/Layer.cc new file mode 100644 index 0000000..10065c5 --- /dev/null +++ b/Layer.cc @@ -0,0 +1,145 @@ +Layer::Layer(int n_neurons, int prev_n_neurons, std::function<double(double)> act_func, std::function<double(double)> deriv_func) + : neurons{}, weights{} +{ + for(int i = 0; i < n_neurons; i++) + { + this->neurons.push_back(new Neuron(act_func, deriv_func)); + } + + if(n_neurons > 0 && prev_n_neurons > 0) + { + this->weights = new Matrix(n_neurons, prev_n_neurons); + this->weights->randomize(); + //std::cout << this->weights << std::endl; + } + else + { + this->weights = new Matrix(n_neurons, 1); + //std::cout << this->weights << std::endl; + for(Neuron* n : this->neurons) + { + n->setBias(0.0); + } + } +} + +Layer::~Layer() +{ + for(Neuron* n : this->neurons) + { + delete n; + } + delete this->weights; +} + +void Layer::setVals(std::vector<double> vals) +{ + for(int i = 0; i < this->neurons.size(); i++) + { + this->neurons[i]->setVal(vals[i]); + } +} + +Matrix* Layer::getWeights() const +{ + return this->weights; +} + +void Layer::randomizeVals() +{ + this->weights->randomize(); + for(Neuron* n : this->neurons) + { + n->randomizeBias(); + } +} + +std::vector<double> Layer::feedForward(std::vector<double> inputs) +{ + std::vector<double> result{}; + + if(this->weights->getCols() == 1) // cols == 1 => Input layer + { + for(int i = 0; i < this->neurons.size(); i++) + { + result.push_back(this->neurons[i]->calculate({inputs[i]}, this->weights->getRow(i))); + } + } + else + { + for(int i = 0; i < this->neurons.size(); i++) + { + result.push_back(this->neurons[i]->calculate(inputs, this->weights->getRow(i))); + } + } + return result; +} + +void Layer::backpropagate(double y_expected, double y_predicted, double loss, double learning_rate, bool output_layer, Layer* prev_layer) +{ + if(output_layer) + { + double loss_yhat = ((1 - y_expected) / (1 - y_predicted)) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^) + double yhat_z = y_predicted * (1 - y_predicted); + std::vector<double> z_w = this->neurons[0]->getInputs(); + + std::vector<double> gradient{}; + double bias_delta = learning_rate * loss_yhat * yhat_z; + for(double d : z_w) + { + gradient.push_back(loss_yhat * yhat_z * d); + } + + this->neurons[0]->adjustBias(bias_delta); + + std::vector<std::vector<double>> w = this->weights->getVals(); + + for(int r = 0; r < w.size(); r++) + { + for(int c = 0; c < w[r].size(); c++) + { + w[r][c] -= learning_rate * gradient[c]; + } + } + + this->weights->setVals(w); + } + else + { + double loss_yhat = (1 - y_expected) / (1 - y_predicted) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^) + double yhat_z = y_predicted * (1 - y_predicted); + std::vector<double> z_a = prev_layer->getWeights()->getVals()[0]; + std::vector<double> a_z{}; + for(Neuron* n : this->neurons) + { + a_z.push_back(n->getDerivative()); + } + + std::vector<std::vector<double>> gradient{}; + for(int r = 0; r < this->weights->getRows(); r++) + { + std::vector<double> grad_row{}; + double loss_z = loss_yhat * yhat_z * z_a[r] * a_z[r]; + double bias_delta = learning_rate * loss_z; + this->neurons[r]->adjustBias(bias_delta); + std::vector<double> inputs = this->neurons[r]->getInputs(); + for(int c = 0; c < this->weights->getCols(); c++) + { + grad_row.push_back(loss_z * inputs[c]); + } + gradient.push_back(grad_row); + } + + std::vector<std::vector<double>> w = this->weights->getVals(); + + for(int r = 0; r < w.size(); r++) + { + for(int c = 0; c < w[r].size(); c++) + { + w[r][c] -= learning_rate * gradient[r][c]; + } + } + + this->weights->setVals(w); + } +} diff --git a/Layer.h b/Layer.h new file mode 100644 index 0000000..288f2e1 --- /dev/null +++ b/Layer.h @@ -0,0 +1,32 @@ +#ifndef LAYER_H +#define LAYER_H + +#include <iostream> +#include <vector> +#include <functional> +#include "Neuron.h" +#include "Matrix.h" + +class Layer +{ +public: + Layer(int, int, std::function<double(double)>, std::function<double(double)>); + ~Layer(); + + void setVals(std::vector<double>); + + Matrix* getWeights() const; + void randomizeVals(); + + std::vector<double> feedForward(std::vector<double>); + void backpropagate(double, double, double, double, bool, Layer*); + +private: + std::vector<Neuron*> neurons; + Matrix* weights; +}; + +#include "Layer.cc" + +#endif // LAYER_H + diff --git a/Matrix.cc b/Matrix.cc new file mode 100644 index 0000000..9437142 --- /dev/null +++ b/Matrix.cc @@ -0,0 +1,91 @@ +Matrix::Matrix(int rows, int cols) + : rows{rows}, cols{cols}, vals{} +{ + for(int r = 0; r < this->rows; r++) + { + std::vector<double> row; + for(int c = 0; c < this->cols; c++) + { + row.push_back(1.0); + } + this->vals.push_back(row); + } +} + +Matrix::~Matrix() +{} + +std::vector<std::vector<double>> Matrix::getVals() const +{ + return this->vals; +} + +void Matrix::setVals(std::vector<std::vector<double>> v) +{ + this->vals = v; +} + +std::vector<double> Matrix::getRow(int index) const +{ + return this->vals[index]; +} + +int Matrix::getRows() const +{ + return this->rows; +} + +int Matrix::getCols() const +{ + return this->cols; +} + +void Matrix::randomize() +{ + for(int r = 0; r < this->rows; r++) + { + for(int c = 0; c < this->cols; c++) + { + this->vals[r][c] = (rand() % 10000) / 10000.0; + // this->vals[r][c] -= 0.5; + } + } +} + +void Matrix::transpose() +{ + std::vector<std::vector<double>> new_vals{}; + + for(int c = 0; c < this->cols; c++) + { + std::vector<double> row; + for(int r = 0; r < this->rows; r++) + { + row.push_back(this->vals[r][c]); + } + new_vals.push_back(row); + } + + this->vals = new_vals; + int temp_r = this->rows; + this->rows = this->cols; + this->cols = temp_r; +} + +std::ostream& operator<<(std::ostream& os, Matrix* m) +{ + std::vector<std::vector<double>> vals = m->getVals(); + + os << "Matrix: " << m->getRows() << " x " << m->getCols() << std::endl; + + for(std::vector<double> v : vals) + { + for(double d : v) + { + os << std::fixed << std::setprecision(4) << "[" << d << "] "; + } + os << std::endl; + } + + return os; +} \ No newline at end of file diff --git a/Matrix.h b/Matrix.h new file mode 100644 index 0000000..1dafe85 --- /dev/null +++ b/Matrix.h @@ -0,0 +1,34 @@ +#ifndef MATRIX_H +#define MATRIX_H + +#include <iostream> +#include <iomanip> +#include <vector> + +class Matrix +{ +public: + Matrix(int, int); + ~Matrix(); + + std::vector<std::vector<double>> getVals() const; + void setVals(std::vector<std::vector<double>>); + std::vector<double> getRow(int) const; + + int getRows() const; + int getCols() const; + + void randomize(); + void transpose(); + +private: + int rows; + int cols; + std::vector<std::vector<double>> vals; +}; + +std::ostream& operator<<(std::ostream&, Matrix*); + +#include "Matrix.cc" + +#endif // MATRIX_H \ No newline at end of file diff --git a/NeuralNetwork.cc b/NeuralNetwork.cc new file mode 100644 index 0000000..21dceb8 --- /dev/null +++ b/NeuralNetwork.cc @@ -0,0 +1,121 @@ +NeuralNetwork::NeuralNetwork(std::vector<int> layer_sizes) + : layers{} +{ + auto it = layer_sizes.begin(); + + while(it != layer_sizes.end()) + { + std::function<double(double)> act_func = &ActivationFunction::ReLU; + std::function<double(double)> deriv_func = &ActivationFunction::ReLU_der; + + if(it == layer_sizes.begin()) + { + act_func = &ActivationFunction::Linear; + deriv_func = &ActivationFunction::Linear_der; + } + else if(it == layer_sizes.end() - 1) + { + act_func = &ActivationFunction::Sigmoid; + deriv_func = &ActivationFunction::Sigmoid_der; + } + + if(it != layer_sizes.begin()) + this->layers.push_back(new Layer(*it, *(it - 1), act_func, deriv_func)); + else if(it == layer_sizes.begin()) + this->layers.push_back(new Layer(*it, 0, act_func, deriv_func)); + it++; + } +} + +NeuralNetwork::~NeuralNetwork() +{ + for(Layer* l : this->layers) + { + delete l; + } +} + +std::vector<Layer*> NeuralNetwork::getLayers() const +{ + return this->layers; +} + +double NeuralNetwork::run(std::vector<double> inputs) +{ + std::vector<double> next_inputs = inputs; + + auto it = this->layers.begin(); + for(; it != this->layers.end(); it++) + { + next_inputs = (*it)->feedForward(next_inputs); + } + + return next_inputs[0]; + +} + +void NeuralNetwork::train(Dataset ds, double learning_rate) +{ + std::cout << "----- TRAINING -----" << std::endl; + double lowest_loss{9999.0}; + int count{0}; + + while(lowest_loss > 0.00001) + { + double total_loss{}; + double samples{0}; + + double y_expected{}; + double y_predicted{}; + + double average_loss{learning_rate}; + for(TrainingData* td : ds) + { + std::vector<double> inputs = td->getInputs(); + y_expected = td->getOutputs()[0]; + y_predicted = this->run(inputs); + double loss = y_expected * log(y_predicted) + (1 - y_expected) * log(1.0 - y_predicted); // y * log(y^) + (1 - y) * log(1 - y^) + + total_loss += loss; + samples++; + + auto it = this->layers.end() - 1; + for(; it != this->layers.begin(); it--) + { + (*it)->backpropagate(y_expected, y_predicted, -loss, average_loss * 10, it == this->layers.end() - 1, *(it + 1)); + } + } + + average_loss = -(1/ (2 * samples)) * total_loss; + + if(average_loss > lowest_loss + 0.02 && average_loss > 0.1) + { + for(int l = 1; l < this->layers.size(); l++) + { + this->layers[l]->randomizeVals(); + + } + lowest_loss = 9999; + //this->layers[1]->randomizeVals(); + //this->layers[2]->randomizeVals(); + } + + if(count > 1000 && average_loss > 0.1) + { + // this->layers[1]->randomizeVals(); + // this->layers[2]->randomizeVals(); + count = 0; + } + + count++; + + + if(abs(average_loss) < lowest_loss) + lowest_loss = average_loss; + + double loss_yhat = (1 - y_expected) / (1 - y_predicted) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^) + + //std::cout << std::fixed << std::setprecision(8) << "Loss: " << average_loss << std::endl; + } + +} \ No newline at end of file diff --git a/NeuralNetwork.h b/NeuralNetwork.h new file mode 100644 index 0000000..a3c66ee --- /dev/null +++ b/NeuralNetwork.h @@ -0,0 +1,33 @@ +#ifndef NEURALNETWORK_H +#define NEURALNETWORK_H + +#include <iostream> +#include <iomanip> +#include <vector> +#include <initializer_list> +#include <math.h> + +#include "Layer.h" +#include "ActivationFunction.h" +#include "Matrix.h" +#include "TrainingData.h" + +class NeuralNetwork +{ +public: + NeuralNetwork(std::vector<int>); + ~NeuralNetwork(); + + std::vector<Layer*> getLayers() const; + + double run(std::vector<double>); + + void train(Dataset, double); + +private: + std::vector<Layer*> layers; +}; + +#include "NeuralNetwork.cc" + +#endif // NEURALNETWORK_H diff --git a/Neuron.cc b/Neuron.cc new file mode 100644 index 0000000..09b7997 --- /dev/null +++ b/Neuron.cc @@ -0,0 +1,64 @@ +Neuron::Neuron(std::function<double(double)> act_func, std::function<double(double)> deriv_func) + : z_val{}, a_val{}, bias{(rand() % 10000) / 10000.0}, activation_function{act_func}, derivative_function{deriv_func} +{} + +Neuron::~Neuron() +{} + +double Neuron::getVal() const +{ + return this->a_val; +} + +void Neuron::setVal(double v) +{ + this->a_val = v; +} + +double Neuron::getDerivative() const +{ + return this->derivative_function(this->z_val); +} + +std::vector<double> Neuron::getInputs() const +{ + return this->inputs; +} + +void Neuron::setBias(double v) +{ + this->bias = v; +} + +void Neuron::adjustBias(double delta) +{ + this->bias -= delta; +} + +void Neuron::randomizeBias() +{ + this->bias = (rand() % 10000) / 10000.0; +} + +double Neuron::calculate(std::vector<double> inputs, std::vector<double> weights) +{ + if(inputs.size() != weights.size()) + { + std::cerr << "ERROR: Input and Weight is not the same size -> " << inputs.size() << " and " << weights.size() << std::endl; + return 0; + } + + double z{0}; // sum of weighted inputs + bias + + for(int i = 0; i < inputs.size(); i++) + { + z += inputs[i] * weights[i]; + } + + this->z_val = z + this->bias; + this->a_val = this->activation_function(this->z_val); + this->inputs = inputs; + + return this->a_val; + +} \ No newline at end of file diff --git a/Neuron.h b/Neuron.h new file mode 100644 index 0000000..58551d2 --- /dev/null +++ b/Neuron.h @@ -0,0 +1,38 @@ +#ifndef NEURON_H +#define NEURON_H + +#include <functional> +#include "Matrix.h" + +class Neuron +{ +public: + Neuron(std::function<double(double)>, std::function<double(double)>); + ~Neuron(); + + double getVal() const; + void setVal(double); + + double getDerivative() const; + + std::vector<double> getInputs() const; + + void setBias(double); + void adjustBias(double); + void randomizeBias(); + + double calculate(std::vector<double>, std::vector<double>); + +private: + double z_val; // weighted val + bias + double a_val; // activated z_val + std::vector<double> inputs; + double bias; + std::function<double(double)> activation_function; + std::function<double(double)> derivative_function; + +}; + +#include "Neuron.cc" + +#endif // NEURON_H \ No newline at end of file diff --git a/TrainingData.cc b/TrainingData.cc new file mode 100644 index 0000000..f52f208 --- /dev/null +++ b/TrainingData.cc @@ -0,0 +1,13 @@ +TrainingData::TrainingData(std::vector<double> in, std::vector<double> out) + : inputs{in}, outputs{out} +{} + +std::vector<double> TrainingData::getInputs() const +{ + return this->inputs; +} + +std::vector<double> TrainingData::getOutputs() const +{ + return this->outputs; +} \ No newline at end of file diff --git a/TrainingData.h b/TrainingData.h new file mode 100644 index 0000000..5ee4efd --- /dev/null +++ b/TrainingData.h @@ -0,0 +1,25 @@ +#ifndef TRAININGDATA_H +#define TRAININGDATA_H + +#include <vector> +#include <vector> + +class TrainingData +{ +public: + TrainingData(std::vector<double>, std::vector<double>); + + std::vector<double> getInputs() const; + std::vector<double> getOutputs() const; + +private: + std::vector<double> inputs; + std::vector<double> outputs; + +}; + +typedef std::vector<TrainingData*> Dataset; + +#include "TrainingData.cc" + +#endif // TRAININGDATA_H \ No newline at end of file diff --git a/main.cc b/main.cc new file mode 100644 index 0000000..acabe62 --- /dev/null +++ b/main.cc @@ -0,0 +1,84 @@ +#include <iostream> +#include <iomanip> +#include <time.h> +#include <string> + +#include "NeuralNetwork.h" +#include "TrainingData.h" + +int main(int argc, char* argv[]) +{ + + if(argc < 3) + { + std::cerr << "ERROR: Enter sizes of atleast 2 layers!" << std::endl; + std::cerr << "USAGE: " << argv[0] << " 2 4 1" << std::endl; + return 0; + } + + std::vector<int> layer_sizes{}; + + for(int i = 1; i < argc; i++) + { + layer_sizes.push_back(atoi(argv[i])); + } + + std::ios_base::sync_with_stdio(0); + srand(time(0)); + + NeuralNetwork* nn = new NeuralNetwork(layer_sizes); + + double user_in{}; + + Dataset ds{}; + int n_samples; + + std::cout << "Enter number of samples for training dataset: "; + std::cin >> n_samples; + for(int i = 0; i < n_samples; i++) + { + std::cout << "Entering trainig data!" << std::endl; + std::vector<double> user_inputs{}; + std::vector<double> user_outputs{}; + + for(int i = 0; i < layer_sizes[0]; i++) + { + std::cout << "Input " << i+1 << ": "; + std::cin >> user_in; + user_inputs.push_back(user_in); + } + + for(int i = 0; i < layer_sizes[layer_sizes.size() - 1]; i++) + { + std::cout << "Output " << i+1 << ": "; + std::cin >> user_in; + user_outputs.push_back(user_in); + } + ds.push_back(new TrainingData(user_inputs, user_outputs)); + } + + nn->train(ds, 0.01); + + while(true) + { + std::vector<double> inputs{}; + std::cout << "Enter inputs (ie: 1 0 0 1): "; + for(int i = 0; i < layer_sizes[0]; i++) + { + double val{}; + std::cin >> val; + inputs.push_back(val); + } + std::cin.ignore(1000, '\n'); + std::cout << std::fixed << std::setprecision(4) << "Predicted output: " << nn->run(inputs) << std::endl << std::endl; + } + + delete nn; + + for(TrainingData* td : ds) + { + delete td; + } + + return 0; +} \ No newline at end of file -- GitLab