diff --git a/ActivationFunction.cc b/ActivationFunction.cc new file mode 100644 index 0000000000000000000000000000000000000000..0310b469a4f7e0066b5e56fab80df8d1df82aa0e --- /dev/null +++ b/ActivationFunction.cc @@ -0,0 +1,38 @@ +double ActivationFunction::Linear(double v) +{ + if(v > 1) + return 1.0; + + if(v < 0) + return 0.0; + + return v; +} + +double ActivationFunction::ReLU(double v) +{ + return std::max(0.0, v); +} + +double ActivationFunction::Sigmoid(double v) +{ + return (1) / (1 + pow(M_E, -v)); +} + +double ActivationFunction::Linear_der(double v) +{ + return 1.0; +} + +double ActivationFunction::ReLU_der(double v) +{ + if(std::max(0.0, v) == 0.0) + return 0.0; + + return 1.0; +} + +double ActivationFunction::Sigmoid_der(double v) +{ + return v * (1 - v); +} \ No newline at end of file diff --git a/ActivationFunction.h b/ActivationFunction.h new file mode 100644 index 0000000000000000000000000000000000000000..4077c9b2a4436e4a915066d5a4a07d18673f99c0 --- /dev/null +++ b/ActivationFunction.h @@ -0,0 +1,21 @@ +#ifndef ACTIVATIONFUNCTION_H +#define ACTIVATIONFUNCTION_H + +namespace ActivationFunction +{ + + typedef std::function<double(double)> ActFunc; + + double Linear(double); + double ReLU(double); + double Sigmoid(double); + + double Linear_der(double); + double ReLU_der(double); + double Sigmoid_der(double); + +} + +#include "ActivationFunction.cc" + +#endif // ACTIVATIONFUNCTION_H \ No newline at end of file diff --git a/Layer.cc b/Layer.cc new file mode 100644 index 0000000000000000000000000000000000000000..10065c5a92b5b5c9dd0a86453fce499f3b526566 --- /dev/null +++ b/Layer.cc @@ -0,0 +1,145 @@ +Layer::Layer(int n_neurons, int prev_n_neurons, std::function<double(double)> act_func, std::function<double(double)> deriv_func) + : neurons{}, weights{} +{ + for(int i = 0; i < n_neurons; i++) + { + this->neurons.push_back(new Neuron(act_func, deriv_func)); + } + + if(n_neurons > 0 && prev_n_neurons > 0) + { + this->weights = new Matrix(n_neurons, prev_n_neurons); + this->weights->randomize(); + //std::cout << this->weights << std::endl; + } + else + { + this->weights = new Matrix(n_neurons, 1); + //std::cout << this->weights << std::endl; + for(Neuron* n : this->neurons) + { + n->setBias(0.0); + } + } +} + +Layer::~Layer() +{ + for(Neuron* n : this->neurons) + { + delete n; + } + delete this->weights; +} + +void Layer::setVals(std::vector<double> vals) +{ + for(int i = 0; i < this->neurons.size(); i++) + { + this->neurons[i]->setVal(vals[i]); + } +} + +Matrix* Layer::getWeights() const +{ + return this->weights; +} + +void Layer::randomizeVals() +{ + this->weights->randomize(); + for(Neuron* n : this->neurons) + { + n->randomizeBias(); + } +} + +std::vector<double> Layer::feedForward(std::vector<double> inputs) +{ + std::vector<double> result{}; + + if(this->weights->getCols() == 1) // cols == 1 => Input layer + { + for(int i = 0; i < this->neurons.size(); i++) + { + result.push_back(this->neurons[i]->calculate({inputs[i]}, this->weights->getRow(i))); + } + } + else + { + for(int i = 0; i < this->neurons.size(); i++) + { + result.push_back(this->neurons[i]->calculate(inputs, this->weights->getRow(i))); + } + } + return result; +} + +void Layer::backpropagate(double y_expected, double y_predicted, double loss, double learning_rate, bool output_layer, Layer* prev_layer) +{ + if(output_layer) + { + double loss_yhat = ((1 - y_expected) / (1 - y_predicted)) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^) + double yhat_z = y_predicted * (1 - y_predicted); + std::vector<double> z_w = this->neurons[0]->getInputs(); + + std::vector<double> gradient{}; + double bias_delta = learning_rate * loss_yhat * yhat_z; + for(double d : z_w) + { + gradient.push_back(loss_yhat * yhat_z * d); + } + + this->neurons[0]->adjustBias(bias_delta); + + std::vector<std::vector<double>> w = this->weights->getVals(); + + for(int r = 0; r < w.size(); r++) + { + for(int c = 0; c < w[r].size(); c++) + { + w[r][c] -= learning_rate * gradient[c]; + } + } + + this->weights->setVals(w); + } + else + { + double loss_yhat = (1 - y_expected) / (1 - y_predicted) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^) + double yhat_z = y_predicted * (1 - y_predicted); + std::vector<double> z_a = prev_layer->getWeights()->getVals()[0]; + std::vector<double> a_z{}; + for(Neuron* n : this->neurons) + { + a_z.push_back(n->getDerivative()); + } + + std::vector<std::vector<double>> gradient{}; + for(int r = 0; r < this->weights->getRows(); r++) + { + std::vector<double> grad_row{}; + double loss_z = loss_yhat * yhat_z * z_a[r] * a_z[r]; + double bias_delta = learning_rate * loss_z; + this->neurons[r]->adjustBias(bias_delta); + std::vector<double> inputs = this->neurons[r]->getInputs(); + for(int c = 0; c < this->weights->getCols(); c++) + { + grad_row.push_back(loss_z * inputs[c]); + } + gradient.push_back(grad_row); + } + + std::vector<std::vector<double>> w = this->weights->getVals(); + + for(int r = 0; r < w.size(); r++) + { + for(int c = 0; c < w[r].size(); c++) + { + w[r][c] -= learning_rate * gradient[r][c]; + } + } + + this->weights->setVals(w); + } +} diff --git a/Layer.h b/Layer.h new file mode 100644 index 0000000000000000000000000000000000000000..288f2e172bc09732de6ddcd0691658df6ca3e92d --- /dev/null +++ b/Layer.h @@ -0,0 +1,32 @@ +#ifndef LAYER_H +#define LAYER_H + +#include <iostream> +#include <vector> +#include <functional> +#include "Neuron.h" +#include "Matrix.h" + +class Layer +{ +public: + Layer(int, int, std::function<double(double)>, std::function<double(double)>); + ~Layer(); + + void setVals(std::vector<double>); + + Matrix* getWeights() const; + void randomizeVals(); + + std::vector<double> feedForward(std::vector<double>); + void backpropagate(double, double, double, double, bool, Layer*); + +private: + std::vector<Neuron*> neurons; + Matrix* weights; +}; + +#include "Layer.cc" + +#endif // LAYER_H + diff --git a/Matrix.cc b/Matrix.cc new file mode 100644 index 0000000000000000000000000000000000000000..9437142ea3710efaffbfbd4e5b82de7819e3ed30 --- /dev/null +++ b/Matrix.cc @@ -0,0 +1,91 @@ +Matrix::Matrix(int rows, int cols) + : rows{rows}, cols{cols}, vals{} +{ + for(int r = 0; r < this->rows; r++) + { + std::vector<double> row; + for(int c = 0; c < this->cols; c++) + { + row.push_back(1.0); + } + this->vals.push_back(row); + } +} + +Matrix::~Matrix() +{} + +std::vector<std::vector<double>> Matrix::getVals() const +{ + return this->vals; +} + +void Matrix::setVals(std::vector<std::vector<double>> v) +{ + this->vals = v; +} + +std::vector<double> Matrix::getRow(int index) const +{ + return this->vals[index]; +} + +int Matrix::getRows() const +{ + return this->rows; +} + +int Matrix::getCols() const +{ + return this->cols; +} + +void Matrix::randomize() +{ + for(int r = 0; r < this->rows; r++) + { + for(int c = 0; c < this->cols; c++) + { + this->vals[r][c] = (rand() % 10000) / 10000.0; + // this->vals[r][c] -= 0.5; + } + } +} + +void Matrix::transpose() +{ + std::vector<std::vector<double>> new_vals{}; + + for(int c = 0; c < this->cols; c++) + { + std::vector<double> row; + for(int r = 0; r < this->rows; r++) + { + row.push_back(this->vals[r][c]); + } + new_vals.push_back(row); + } + + this->vals = new_vals; + int temp_r = this->rows; + this->rows = this->cols; + this->cols = temp_r; +} + +std::ostream& operator<<(std::ostream& os, Matrix* m) +{ + std::vector<std::vector<double>> vals = m->getVals(); + + os << "Matrix: " << m->getRows() << " x " << m->getCols() << std::endl; + + for(std::vector<double> v : vals) + { + for(double d : v) + { + os << std::fixed << std::setprecision(4) << "[" << d << "] "; + } + os << std::endl; + } + + return os; +} \ No newline at end of file diff --git a/Matrix.h b/Matrix.h new file mode 100644 index 0000000000000000000000000000000000000000..1dafe85e4eb6256d4f331f71b117a1f9c4333f09 --- /dev/null +++ b/Matrix.h @@ -0,0 +1,34 @@ +#ifndef MATRIX_H +#define MATRIX_H + +#include <iostream> +#include <iomanip> +#include <vector> + +class Matrix +{ +public: + Matrix(int, int); + ~Matrix(); + + std::vector<std::vector<double>> getVals() const; + void setVals(std::vector<std::vector<double>>); + std::vector<double> getRow(int) const; + + int getRows() const; + int getCols() const; + + void randomize(); + void transpose(); + +private: + int rows; + int cols; + std::vector<std::vector<double>> vals; +}; + +std::ostream& operator<<(std::ostream&, Matrix*); + +#include "Matrix.cc" + +#endif // MATRIX_H \ No newline at end of file diff --git a/NeuralNetwork.cc b/NeuralNetwork.cc new file mode 100644 index 0000000000000000000000000000000000000000..21dceb84ff04a15f8a500833b387b2fa00df6847 --- /dev/null +++ b/NeuralNetwork.cc @@ -0,0 +1,121 @@ +NeuralNetwork::NeuralNetwork(std::vector<int> layer_sizes) + : layers{} +{ + auto it = layer_sizes.begin(); + + while(it != layer_sizes.end()) + { + std::function<double(double)> act_func = &ActivationFunction::ReLU; + std::function<double(double)> deriv_func = &ActivationFunction::ReLU_der; + + if(it == layer_sizes.begin()) + { + act_func = &ActivationFunction::Linear; + deriv_func = &ActivationFunction::Linear_der; + } + else if(it == layer_sizes.end() - 1) + { + act_func = &ActivationFunction::Sigmoid; + deriv_func = &ActivationFunction::Sigmoid_der; + } + + if(it != layer_sizes.begin()) + this->layers.push_back(new Layer(*it, *(it - 1), act_func, deriv_func)); + else if(it == layer_sizes.begin()) + this->layers.push_back(new Layer(*it, 0, act_func, deriv_func)); + it++; + } +} + +NeuralNetwork::~NeuralNetwork() +{ + for(Layer* l : this->layers) + { + delete l; + } +} + +std::vector<Layer*> NeuralNetwork::getLayers() const +{ + return this->layers; +} + +double NeuralNetwork::run(std::vector<double> inputs) +{ + std::vector<double> next_inputs = inputs; + + auto it = this->layers.begin(); + for(; it != this->layers.end(); it++) + { + next_inputs = (*it)->feedForward(next_inputs); + } + + return next_inputs[0]; + +} + +void NeuralNetwork::train(Dataset ds, double learning_rate) +{ + std::cout << "----- TRAINING -----" << std::endl; + double lowest_loss{9999.0}; + int count{0}; + + while(lowest_loss > 0.00001) + { + double total_loss{}; + double samples{0}; + + double y_expected{}; + double y_predicted{}; + + double average_loss{learning_rate}; + for(TrainingData* td : ds) + { + std::vector<double> inputs = td->getInputs(); + y_expected = td->getOutputs()[0]; + y_predicted = this->run(inputs); + double loss = y_expected * log(y_predicted) + (1 - y_expected) * log(1.0 - y_predicted); // y * log(y^) + (1 - y) * log(1 - y^) + + total_loss += loss; + samples++; + + auto it = this->layers.end() - 1; + for(; it != this->layers.begin(); it--) + { + (*it)->backpropagate(y_expected, y_predicted, -loss, average_loss * 10, it == this->layers.end() - 1, *(it + 1)); + } + } + + average_loss = -(1/ (2 * samples)) * total_loss; + + if(average_loss > lowest_loss + 0.02 && average_loss > 0.1) + { + for(int l = 1; l < this->layers.size(); l++) + { + this->layers[l]->randomizeVals(); + + } + lowest_loss = 9999; + //this->layers[1]->randomizeVals(); + //this->layers[2]->randomizeVals(); + } + + if(count > 1000 && average_loss > 0.1) + { + // this->layers[1]->randomizeVals(); + // this->layers[2]->randomizeVals(); + count = 0; + } + + count++; + + + if(abs(average_loss) < lowest_loss) + lowest_loss = average_loss; + + double loss_yhat = (1 - y_expected) / (1 - y_predicted) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^) + + //std::cout << std::fixed << std::setprecision(8) << "Loss: " << average_loss << std::endl; + } + +} \ No newline at end of file diff --git a/NeuralNetwork.h b/NeuralNetwork.h new file mode 100644 index 0000000000000000000000000000000000000000..a3c66ee4af829bf6c28cff96ca872ff19bac2e11 --- /dev/null +++ b/NeuralNetwork.h @@ -0,0 +1,33 @@ +#ifndef NEURALNETWORK_H +#define NEURALNETWORK_H + +#include <iostream> +#include <iomanip> +#include <vector> +#include <initializer_list> +#include <math.h> + +#include "Layer.h" +#include "ActivationFunction.h" +#include "Matrix.h" +#include "TrainingData.h" + +class NeuralNetwork +{ +public: + NeuralNetwork(std::vector<int>); + ~NeuralNetwork(); + + std::vector<Layer*> getLayers() const; + + double run(std::vector<double>); + + void train(Dataset, double); + +private: + std::vector<Layer*> layers; +}; + +#include "NeuralNetwork.cc" + +#endif // NEURALNETWORK_H diff --git a/Neuron.cc b/Neuron.cc new file mode 100644 index 0000000000000000000000000000000000000000..09b7997478cc4c30136c043d80371107b14f658c --- /dev/null +++ b/Neuron.cc @@ -0,0 +1,64 @@ +Neuron::Neuron(std::function<double(double)> act_func, std::function<double(double)> deriv_func) + : z_val{}, a_val{}, bias{(rand() % 10000) / 10000.0}, activation_function{act_func}, derivative_function{deriv_func} +{} + +Neuron::~Neuron() +{} + +double Neuron::getVal() const +{ + return this->a_val; +} + +void Neuron::setVal(double v) +{ + this->a_val = v; +} + +double Neuron::getDerivative() const +{ + return this->derivative_function(this->z_val); +} + +std::vector<double> Neuron::getInputs() const +{ + return this->inputs; +} + +void Neuron::setBias(double v) +{ + this->bias = v; +} + +void Neuron::adjustBias(double delta) +{ + this->bias -= delta; +} + +void Neuron::randomizeBias() +{ + this->bias = (rand() % 10000) / 10000.0; +} + +double Neuron::calculate(std::vector<double> inputs, std::vector<double> weights) +{ + if(inputs.size() != weights.size()) + { + std::cerr << "ERROR: Input and Weight is not the same size -> " << inputs.size() << " and " << weights.size() << std::endl; + return 0; + } + + double z{0}; // sum of weighted inputs + bias + + for(int i = 0; i < inputs.size(); i++) + { + z += inputs[i] * weights[i]; + } + + this->z_val = z + this->bias; + this->a_val = this->activation_function(this->z_val); + this->inputs = inputs; + + return this->a_val; + +} \ No newline at end of file diff --git a/Neuron.h b/Neuron.h new file mode 100644 index 0000000000000000000000000000000000000000..58551d25073eedf0edd5bed505f09bfde93450b9 --- /dev/null +++ b/Neuron.h @@ -0,0 +1,38 @@ +#ifndef NEURON_H +#define NEURON_H + +#include <functional> +#include "Matrix.h" + +class Neuron +{ +public: + Neuron(std::function<double(double)>, std::function<double(double)>); + ~Neuron(); + + double getVal() const; + void setVal(double); + + double getDerivative() const; + + std::vector<double> getInputs() const; + + void setBias(double); + void adjustBias(double); + void randomizeBias(); + + double calculate(std::vector<double>, std::vector<double>); + +private: + double z_val; // weighted val + bias + double a_val; // activated z_val + std::vector<double> inputs; + double bias; + std::function<double(double)> activation_function; + std::function<double(double)> derivative_function; + +}; + +#include "Neuron.cc" + +#endif // NEURON_H \ No newline at end of file diff --git a/TrainingData.cc b/TrainingData.cc new file mode 100644 index 0000000000000000000000000000000000000000..f52f208b72c29da2545a1842f2aa341001ef0179 --- /dev/null +++ b/TrainingData.cc @@ -0,0 +1,13 @@ +TrainingData::TrainingData(std::vector<double> in, std::vector<double> out) + : inputs{in}, outputs{out} +{} + +std::vector<double> TrainingData::getInputs() const +{ + return this->inputs; +} + +std::vector<double> TrainingData::getOutputs() const +{ + return this->outputs; +} \ No newline at end of file diff --git a/TrainingData.h b/TrainingData.h new file mode 100644 index 0000000000000000000000000000000000000000..5ee4efd668fd8fe03b895a635daf41126cc51530 --- /dev/null +++ b/TrainingData.h @@ -0,0 +1,25 @@ +#ifndef TRAININGDATA_H +#define TRAININGDATA_H + +#include <vector> +#include <vector> + +class TrainingData +{ +public: + TrainingData(std::vector<double>, std::vector<double>); + + std::vector<double> getInputs() const; + std::vector<double> getOutputs() const; + +private: + std::vector<double> inputs; + std::vector<double> outputs; + +}; + +typedef std::vector<TrainingData*> Dataset; + +#include "TrainingData.cc" + +#endif // TRAININGDATA_H \ No newline at end of file diff --git a/main.cc b/main.cc new file mode 100644 index 0000000000000000000000000000000000000000..acabe62f37803c778d7693cdb45be4311bb6ef55 --- /dev/null +++ b/main.cc @@ -0,0 +1,84 @@ +#include <iostream> +#include <iomanip> +#include <time.h> +#include <string> + +#include "NeuralNetwork.h" +#include "TrainingData.h" + +int main(int argc, char* argv[]) +{ + + if(argc < 3) + { + std::cerr << "ERROR: Enter sizes of atleast 2 layers!" << std::endl; + std::cerr << "USAGE: " << argv[0] << " 2 4 1" << std::endl; + return 0; + } + + std::vector<int> layer_sizes{}; + + for(int i = 1; i < argc; i++) + { + layer_sizes.push_back(atoi(argv[i])); + } + + std::ios_base::sync_with_stdio(0); + srand(time(0)); + + NeuralNetwork* nn = new NeuralNetwork(layer_sizes); + + double user_in{}; + + Dataset ds{}; + int n_samples; + + std::cout << "Enter number of samples for training dataset: "; + std::cin >> n_samples; + for(int i = 0; i < n_samples; i++) + { + std::cout << "Entering trainig data!" << std::endl; + std::vector<double> user_inputs{}; + std::vector<double> user_outputs{}; + + for(int i = 0; i < layer_sizes[0]; i++) + { + std::cout << "Input " << i+1 << ": "; + std::cin >> user_in; + user_inputs.push_back(user_in); + } + + for(int i = 0; i < layer_sizes[layer_sizes.size() - 1]; i++) + { + std::cout << "Output " << i+1 << ": "; + std::cin >> user_in; + user_outputs.push_back(user_in); + } + ds.push_back(new TrainingData(user_inputs, user_outputs)); + } + + nn->train(ds, 0.01); + + while(true) + { + std::vector<double> inputs{}; + std::cout << "Enter inputs (ie: 1 0 0 1): "; + for(int i = 0; i < layer_sizes[0]; i++) + { + double val{}; + std::cin >> val; + inputs.push_back(val); + } + std::cin.ignore(1000, '\n'); + std::cout << std::fixed << std::setprecision(4) << "Predicted output: " << nn->run(inputs) << std::endl << std::endl; + } + + delete nn; + + for(TrainingData* td : ds) + { + delete td; + } + + return 0; +} \ No newline at end of file