diff --git a/ActivationFunction.cc b/ActivationFunction.cc
new file mode 100644
index 0000000000000000000000000000000000000000..0310b469a4f7e0066b5e56fab80df8d1df82aa0e
--- /dev/null
+++ b/ActivationFunction.cc
@@ -0,0 +1,38 @@
+double ActivationFunction::Linear(double v)
+{
+    if(v > 1)
+        return 1.0;
+
+    if(v < 0)
+        return 0.0;
+
+    return v;
+}
+
+double ActivationFunction::ReLU(double v)
+{
+    return std::max(0.0, v);
+}
+
+double ActivationFunction::Sigmoid(double v)
+{
+    return (1) / (1 + pow(M_E, -v));
+}
+
+double ActivationFunction::Linear_der(double v)
+{
+    return 1.0;
+}
+
+double ActivationFunction::ReLU_der(double v)
+{
+    if(std::max(0.0, v) == 0.0)
+        return 0.0;
+    
+    return 1.0;
+}
+
+double ActivationFunction::Sigmoid_der(double v)
+{
+    return v * (1 - v);
+}
\ No newline at end of file
diff --git a/ActivationFunction.h b/ActivationFunction.h
new file mode 100644
index 0000000000000000000000000000000000000000..4077c9b2a4436e4a915066d5a4a07d18673f99c0
--- /dev/null
+++ b/ActivationFunction.h
@@ -0,0 +1,21 @@
+#ifndef ACTIVATIONFUNCTION_H
+#define ACTIVATIONFUNCTION_H
+
+namespace ActivationFunction
+{
+
+    typedef std::function<double(double)> ActFunc;
+
+    double Linear(double);
+    double ReLU(double);
+    double Sigmoid(double);
+
+    double Linear_der(double);
+    double ReLU_der(double);
+    double Sigmoid_der(double);
+
+}
+
+#include "ActivationFunction.cc"
+
+#endif  // ACTIVATIONFUNCTION_H
\ No newline at end of file
diff --git a/Layer.cc b/Layer.cc
new file mode 100644
index 0000000000000000000000000000000000000000..10065c5a92b5b5c9dd0a86453fce499f3b526566
--- /dev/null
+++ b/Layer.cc
@@ -0,0 +1,145 @@
+Layer::Layer(int n_neurons, int prev_n_neurons, std::function<double(double)> act_func, std::function<double(double)> deriv_func)
+    : neurons{}, weights{}
+{
+    for(int i = 0; i < n_neurons; i++)
+    {
+        this->neurons.push_back(new Neuron(act_func, deriv_func));
+    }
+
+    if(n_neurons > 0 && prev_n_neurons > 0)
+    {
+        this->weights = new Matrix(n_neurons, prev_n_neurons);
+        this->weights->randomize();
+        //std::cout << this->weights << std::endl;
+    }
+    else
+    {
+        this->weights = new Matrix(n_neurons, 1);
+        //std::cout << this->weights << std::endl;
+        for(Neuron* n : this->neurons)
+        {
+            n->setBias(0.0);
+        }
+    }
+}
+
+Layer::~Layer()
+{
+    for(Neuron* n : this->neurons)
+    {
+        delete n;
+    }
+    delete this->weights;
+}
+
+void Layer::setVals(std::vector<double> vals)
+{
+    for(int i = 0; i < this->neurons.size(); i++)
+    {
+        this->neurons[i]->setVal(vals[i]);
+    }
+}
+
+Matrix* Layer::getWeights() const
+{
+    return this->weights;
+}
+
+void Layer::randomizeVals()
+{
+    this->weights->randomize();
+    for(Neuron* n : this->neurons)
+    {
+        n->randomizeBias();
+    }
+}
+
+std::vector<double> Layer::feedForward(std::vector<double> inputs)
+{
+    std::vector<double> result{};
+
+    if(this->weights->getCols() == 1) // cols == 1 => Input layer
+    {
+        for(int i = 0; i < this->neurons.size(); i++)
+        {
+            result.push_back(this->neurons[i]->calculate({inputs[i]}, this->weights->getRow(i)));
+        }
+    }
+    else
+    {
+        for(int i = 0; i < this->neurons.size(); i++)
+        {
+            result.push_back(this->neurons[i]->calculate(inputs, this->weights->getRow(i)));
+        }
+    }
+    return result;
+}
+
+void Layer::backpropagate(double y_expected, double y_predicted, double loss, double learning_rate, bool output_layer, Layer* prev_layer)
+{
+    if(output_layer)
+    {
+        double loss_yhat = ((1 - y_expected) / (1 - y_predicted)) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^)
+        double yhat_z = y_predicted * (1 - y_predicted);
+        std::vector<double> z_w = this->neurons[0]->getInputs();
+
+        std::vector<double> gradient{};
+        double bias_delta = learning_rate * loss_yhat * yhat_z;
+        for(double d : z_w)
+        {
+            gradient.push_back(loss_yhat * yhat_z * d);
+        }
+
+        this->neurons[0]->adjustBias(bias_delta);
+
+        std::vector<std::vector<double>> w = this->weights->getVals();
+
+        for(int r = 0; r < w.size(); r++)
+        {
+            for(int c = 0; c < w[r].size(); c++)
+            {
+                w[r][c] -= learning_rate * gradient[c];
+            }
+        }
+
+        this->weights->setVals(w);
+    }
+    else
+    { 
+        double loss_yhat = (1 - y_expected) / (1 - y_predicted) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^)
+        double yhat_z = y_predicted * (1 - y_predicted);
+        std::vector<double> z_a = prev_layer->getWeights()->getVals()[0];
+        std::vector<double> a_z{};
+        for(Neuron* n : this->neurons)
+        {
+            a_z.push_back(n->getDerivative());
+        }
+
+        std::vector<std::vector<double>> gradient{};
+        for(int r = 0; r < this->weights->getRows(); r++)
+        {
+            std::vector<double> grad_row{};
+            double loss_z = loss_yhat * yhat_z * z_a[r] * a_z[r];
+            double bias_delta = learning_rate * loss_z;
+            this->neurons[r]->adjustBias(bias_delta);
+            std::vector<double> inputs = this->neurons[r]->getInputs();
+            for(int c = 0; c < this->weights->getCols(); c++)
+            {
+                grad_row.push_back(loss_z * inputs[c]);
+            }
+            gradient.push_back(grad_row);
+        }
+
+        std::vector<std::vector<double>> w = this->weights->getVals();
+
+        for(int r = 0; r < w.size(); r++)
+        {
+            for(int c = 0; c < w[r].size(); c++)
+            {
+                w[r][c] -= learning_rate * gradient[r][c];
+            }
+        }
+
+        this->weights->setVals(w);
+    }
+}
diff --git a/Layer.h b/Layer.h
new file mode 100644
index 0000000000000000000000000000000000000000..288f2e172bc09732de6ddcd0691658df6ca3e92d
--- /dev/null
+++ b/Layer.h
@@ -0,0 +1,32 @@
+#ifndef LAYER_H
+#define LAYER_H
+
+#include <iostream>
+#include <vector>
+#include <functional>
+#include "Neuron.h"
+#include "Matrix.h"
+
+class Layer
+{
+public:
+    Layer(int, int, std::function<double(double)>, std::function<double(double)>);
+    ~Layer();
+
+    void setVals(std::vector<double>);
+
+    Matrix* getWeights() const;
+    void randomizeVals();
+
+    std::vector<double> feedForward(std::vector<double>);
+    void backpropagate(double, double, double, double, bool, Layer*);
+
+private:
+    std::vector<Neuron*> neurons;
+    Matrix* weights;
+};
+
+#include "Layer.cc"
+
+#endif  // LAYER_H
+
diff --git a/Matrix.cc b/Matrix.cc
new file mode 100644
index 0000000000000000000000000000000000000000..9437142ea3710efaffbfbd4e5b82de7819e3ed30
--- /dev/null
+++ b/Matrix.cc
@@ -0,0 +1,91 @@
+Matrix::Matrix(int rows, int cols)
+    : rows{rows}, cols{cols}, vals{}
+{
+    for(int r = 0; r < this->rows; r++)
+    {
+        std::vector<double> row;
+        for(int c = 0; c < this->cols; c++)
+        {
+            row.push_back(1.0);
+        }
+        this->vals.push_back(row);
+    }
+}
+
+Matrix::~Matrix()
+{}
+
+std::vector<std::vector<double>> Matrix::getVals() const
+{
+    return this->vals;
+}
+
+void Matrix::setVals(std::vector<std::vector<double>> v)
+{
+    this->vals = v;
+}
+
+std::vector<double> Matrix::getRow(int index) const
+{
+    return this->vals[index];
+}
+
+int Matrix::getRows() const
+{
+    return this->rows;
+}
+
+int Matrix::getCols() const
+{
+    return this->cols;
+}
+
+void Matrix::randomize()
+{
+    for(int r = 0; r < this->rows; r++)
+    {
+        for(int c = 0; c < this->cols; c++)
+        {
+            this->vals[r][c] = (rand() % 10000) / 10000.0;
+          //  this->vals[r][c] -= 0.5;
+        }
+    }
+}
+
+void Matrix::transpose()
+{
+    std::vector<std::vector<double>> new_vals{};
+
+    for(int c = 0; c < this->cols; c++)
+    {
+        std::vector<double> row;
+        for(int r = 0; r < this->rows; r++)
+        {
+            row.push_back(this->vals[r][c]);
+        }
+        new_vals.push_back(row);
+    }
+
+    this->vals = new_vals;
+    int temp_r = this->rows;
+    this->rows = this->cols;
+    this->cols = temp_r;
+}
+
+std::ostream& operator<<(std::ostream& os, Matrix* m)
+{
+    std::vector<std::vector<double>> vals = m->getVals();
+
+    os << "Matrix: " << m->getRows() << " x " << m->getCols() << std::endl;
+
+    for(std::vector<double> v : vals)
+    {
+        for(double d : v)
+        {
+            os << std::fixed << std::setprecision(4) << "[" << d << "] ";
+        }
+        os << std::endl;
+    }
+
+    return os;
+}
\ No newline at end of file
diff --git a/Matrix.h b/Matrix.h
new file mode 100644
index 0000000000000000000000000000000000000000..1dafe85e4eb6256d4f331f71b117a1f9c4333f09
--- /dev/null
+++ b/Matrix.h
@@ -0,0 +1,34 @@
+#ifndef MATRIX_H
+#define MATRIX_H
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+
+class Matrix
+{
+public:
+    Matrix(int, int);
+    ~Matrix();
+
+    std::vector<std::vector<double>> getVals() const;
+    void setVals(std::vector<std::vector<double>>);
+    std::vector<double> getRow(int) const;
+
+    int getRows() const;
+    int getCols() const;
+
+    void randomize();
+    void transpose();
+
+private:
+    int rows;
+    int cols;
+    std::vector<std::vector<double>> vals;
+};
+
+std::ostream& operator<<(std::ostream&, Matrix*);
+
+#include "Matrix.cc"
+
+#endif  // MATRIX_H
\ No newline at end of file
diff --git a/NeuralNetwork.cc b/NeuralNetwork.cc
new file mode 100644
index 0000000000000000000000000000000000000000..21dceb84ff04a15f8a500833b387b2fa00df6847
--- /dev/null
+++ b/NeuralNetwork.cc
@@ -0,0 +1,121 @@
+NeuralNetwork::NeuralNetwork(std::vector<int> layer_sizes)
+    : layers{}
+{
+    auto it = layer_sizes.begin();
+
+    while(it != layer_sizes.end())
+    {
+        std::function<double(double)> act_func = &ActivationFunction::ReLU;
+        std::function<double(double)> deriv_func = &ActivationFunction::ReLU_der;
+
+        if(it == layer_sizes.begin())
+        {
+            act_func = &ActivationFunction::Linear;
+            deriv_func = &ActivationFunction::Linear_der;
+        }
+        else if(it == layer_sizes.end() - 1)
+        {
+            act_func = &ActivationFunction::Sigmoid;
+            deriv_func = &ActivationFunction::Sigmoid_der;
+        }
+
+        if(it != layer_sizes.begin())
+            this->layers.push_back(new Layer(*it, *(it - 1), act_func, deriv_func));
+        else if(it == layer_sizes.begin())
+            this->layers.push_back(new Layer(*it, 0, act_func, deriv_func));
+        it++;
+    }
+}
+
+NeuralNetwork::~NeuralNetwork()
+{
+    for(Layer* l : this->layers)
+    {
+        delete l;
+    }
+}
+
+std::vector<Layer*> NeuralNetwork::getLayers() const
+{
+    return this->layers;
+}
+
+double NeuralNetwork::run(std::vector<double> inputs)
+{
+    std::vector<double> next_inputs = inputs;
+
+    auto it = this->layers.begin();
+    for(; it != this->layers.end(); it++)
+    {
+        next_inputs = (*it)->feedForward(next_inputs);
+    }
+
+    return next_inputs[0];
+    
+}
+
+void NeuralNetwork::train(Dataset ds, double learning_rate)
+{
+    std::cout << "----- TRAINING -----" << std::endl;
+    double lowest_loss{9999.0};
+    int count{0};
+
+    while(lowest_loss > 0.00001)
+    {
+        double total_loss{};
+        double samples{0};
+
+        double y_expected{};
+        double y_predicted{};
+        
+        double average_loss{learning_rate};
+        for(TrainingData* td : ds)
+        {
+            std::vector<double> inputs = td->getInputs();
+            y_expected = td->getOutputs()[0];
+            y_predicted = this->run(inputs);
+            double loss = y_expected * log(y_predicted) + (1 - y_expected) * log(1.0 - y_predicted); // y * log(y^) + (1 - y) * log(1 - y^)
+
+            total_loss += loss;
+            samples++;
+
+            auto it = this->layers.end() - 1;
+            for(; it != this->layers.begin(); it--)
+            {
+                (*it)->backpropagate(y_expected, y_predicted, -loss, average_loss * 10, it == this->layers.end() - 1, *(it + 1));
+            }
+        }
+
+        average_loss = -(1/ (2 * samples)) * total_loss;
+
+        if(average_loss > lowest_loss + 0.02 && average_loss > 0.1)
+        {
+            for(int l = 1; l < this->layers.size(); l++)
+            {
+                this->layers[l]->randomizeVals();
+                
+            } 
+            lowest_loss = 9999;
+            //this->layers[1]->randomizeVals();
+            //this->layers[2]->randomizeVals();
+        }
+
+        if(count > 1000 && average_loss > 0.1)
+        {
+           // this->layers[1]->randomizeVals();
+           // this->layers[2]->randomizeVals();
+            count = 0;
+        }
+
+        count++;
+
+
+        if(abs(average_loss) < lowest_loss)
+            lowest_loss = average_loss;
+
+        double loss_yhat = (1 - y_expected) / (1 - y_predicted) - (y_expected / y_predicted); // (1 - y) / (1 - y^) - (y / y^)
+
+        //std::cout << std::fixed << std::setprecision(8) << "Loss: " << average_loss << std::endl;
+    }
+
+}
\ No newline at end of file
diff --git a/NeuralNetwork.h b/NeuralNetwork.h
new file mode 100644
index 0000000000000000000000000000000000000000..a3c66ee4af829bf6c28cff96ca872ff19bac2e11
--- /dev/null
+++ b/NeuralNetwork.h
@@ -0,0 +1,33 @@
+#ifndef NEURALNETWORK_H
+#define NEURALNETWORK_H
+
+#include <iostream>
+#include <iomanip>
+#include <vector>
+#include <initializer_list>
+#include <math.h>
+
+#include "Layer.h"
+#include "ActivationFunction.h"
+#include "Matrix.h"
+#include "TrainingData.h"
+
+class NeuralNetwork 
+{
+public:
+    NeuralNetwork(std::vector<int>);
+    ~NeuralNetwork();
+
+    std::vector<Layer*> getLayers() const;
+
+    double run(std::vector<double>);
+
+    void train(Dataset, double);
+
+private:
+    std::vector<Layer*> layers;
+};
+
+#include "NeuralNetwork.cc"
+
+#endif  // NEURALNETWORK_H
diff --git a/Neuron.cc b/Neuron.cc
new file mode 100644
index 0000000000000000000000000000000000000000..09b7997478cc4c30136c043d80371107b14f658c
--- /dev/null
+++ b/Neuron.cc
@@ -0,0 +1,64 @@
+Neuron::Neuron(std::function<double(double)> act_func, std::function<double(double)> deriv_func)
+    : z_val{}, a_val{}, bias{(rand() % 10000) / 10000.0}, activation_function{act_func}, derivative_function{deriv_func}
+{}
+
+Neuron::~Neuron()
+{}
+
+double Neuron::getVal() const
+{
+    return this->a_val;
+}
+
+void Neuron::setVal(double v)
+{
+    this->a_val = v;
+}
+
+double Neuron::getDerivative() const
+{
+    return this->derivative_function(this->z_val);
+}
+
+std::vector<double> Neuron::getInputs() const
+{
+    return this->inputs;
+}
+
+void Neuron::setBias(double v)
+{
+    this->bias = v;
+}
+
+void Neuron::adjustBias(double delta)
+{
+    this->bias -= delta;
+}
+
+void Neuron::randomizeBias()
+{
+    this->bias = (rand() % 10000) / 10000.0;
+}
+
+double Neuron::calculate(std::vector<double> inputs, std::vector<double> weights)
+{
+    if(inputs.size() != weights.size())
+    {
+        std::cerr << "ERROR: Input and Weight is not the same size -> " << inputs.size() << " and " << weights.size() << std::endl;
+        return 0;
+    }
+
+    double z{0}; // sum of weighted inputs + bias
+
+    for(int i = 0; i < inputs.size(); i++)
+    {
+        z += inputs[i] * weights[i];
+    }
+
+    this->z_val = z + this->bias;
+    this->a_val = this->activation_function(this->z_val);
+    this->inputs = inputs;
+
+    return this->a_val;
+
+}
\ No newline at end of file
diff --git a/Neuron.h b/Neuron.h
new file mode 100644
index 0000000000000000000000000000000000000000..58551d25073eedf0edd5bed505f09bfde93450b9
--- /dev/null
+++ b/Neuron.h
@@ -0,0 +1,38 @@
+#ifndef NEURON_H
+#define NEURON_H
+
+#include <functional>
+#include "Matrix.h"
+
+class Neuron
+{
+public:
+    Neuron(std::function<double(double)>, std::function<double(double)>);
+    ~Neuron();
+
+    double getVal() const;
+    void setVal(double);
+
+    double getDerivative() const;
+
+    std::vector<double> getInputs() const;
+
+    void setBias(double);
+    void adjustBias(double);
+    void randomizeBias();
+
+    double calculate(std::vector<double>, std::vector<double>);
+
+private:
+    double z_val;   // weighted val + bias
+    double a_val;   // activated z_val
+    std::vector<double> inputs;
+    double bias;
+    std::function<double(double)> activation_function;
+    std::function<double(double)> derivative_function;
+
+};
+
+#include "Neuron.cc"
+
+#endif  // NEURON_H
\ No newline at end of file
diff --git a/TrainingData.cc b/TrainingData.cc
new file mode 100644
index 0000000000000000000000000000000000000000..f52f208b72c29da2545a1842f2aa341001ef0179
--- /dev/null
+++ b/TrainingData.cc
@@ -0,0 +1,13 @@
+TrainingData::TrainingData(std::vector<double> in, std::vector<double> out)
+    : inputs{in}, outputs{out}
+{}
+
+std::vector<double> TrainingData::getInputs() const
+{
+    return this->inputs;
+}
+
+std::vector<double> TrainingData::getOutputs() const
+{
+    return this->outputs;
+}
\ No newline at end of file
diff --git a/TrainingData.h b/TrainingData.h
new file mode 100644
index 0000000000000000000000000000000000000000..5ee4efd668fd8fe03b895a635daf41126cc51530
--- /dev/null
+++ b/TrainingData.h
@@ -0,0 +1,25 @@
+#ifndef TRAININGDATA_H
+#define TRAININGDATA_H
+
+#include <vector>
+#include <vector>
+
+class TrainingData
+{
+public:
+    TrainingData(std::vector<double>, std::vector<double>);
+
+    std::vector<double> getInputs() const;
+    std::vector<double> getOutputs() const;
+
+private:
+    std::vector<double> inputs;
+    std::vector<double> outputs;
+
+};
+
+typedef std::vector<TrainingData*> Dataset;
+
+#include "TrainingData.cc"
+
+#endif  // TRAININGDATA_H
\ No newline at end of file
diff --git a/main.cc b/main.cc
new file mode 100644
index 0000000000000000000000000000000000000000..acabe62f37803c778d7693cdb45be4311bb6ef55
--- /dev/null
+++ b/main.cc
@@ -0,0 +1,84 @@
+#include <iostream>
+#include <iomanip>
+#include <time.h>
+#include <string>
+
+#include "NeuralNetwork.h"
+#include "TrainingData.h"
+
+int main(int argc, char* argv[])
+{
+
+    if(argc < 3)
+    {
+        std::cerr << "ERROR: Enter sizes of atleast 2 layers!" <<  std::endl;
+        std::cerr << "USAGE: " << argv[0] << " 2 4 1" << std::endl;
+        return 0;
+    }
+
+    std::vector<int> layer_sizes{};
+
+    for(int i = 1; i < argc; i++)
+    {
+        layer_sizes.push_back(atoi(argv[i]));
+    }
+
+    std::ios_base::sync_with_stdio(0);
+    srand(time(0));
+
+    NeuralNetwork* nn = new NeuralNetwork(layer_sizes);
+
+    double user_in{};
+
+    Dataset ds{};
+    int n_samples;
+
+    std::cout << "Enter number of samples for training dataset: ";
+    std::cin >> n_samples;
+    for(int i = 0; i < n_samples; i++)
+    {
+        std::cout << "Entering trainig data!" << std::endl;
+        std::vector<double> user_inputs{};
+        std::vector<double> user_outputs{};
+        
+        for(int i = 0; i < layer_sizes[0]; i++)
+        {
+            std::cout << "Input " << i+1 << ": ";
+            std::cin >> user_in;
+            user_inputs.push_back(user_in);
+        }
+
+        for(int i = 0; i < layer_sizes[layer_sizes.size() - 1]; i++)
+        {
+            std::cout << "Output " << i+1 << ": ";
+            std::cin >> user_in;
+            user_outputs.push_back(user_in);
+        }
+        ds.push_back(new TrainingData(user_inputs, user_outputs));
+    }
+    
+    nn->train(ds, 0.01);
+
+    while(true)
+    {
+        std::vector<double> inputs{};
+        std::cout << "Enter inputs (ie: 1 0 0 1): ";
+        for(int i = 0; i < layer_sizes[0]; i++)
+        {
+            double val{};
+            std::cin >> val;
+            inputs.push_back(val);
+        }
+        std::cin.ignore(1000, '\n');
+        std::cout << std::fixed << std::setprecision(4) << "Predicted output: " << nn->run(inputs) << std::endl << std::endl;
+    }
+
+    delete nn;
+
+    for(TrainingData* td : ds)
+    {
+        delete td;
+    }
+
+    return 0;
+}
\ No newline at end of file