Building a Neural Network from Scratch in Java

Learner1067
6 min readJun 2, 2024

--

Key Concepts of Neural Networks

1. Artificial Neuron: The basic unit of a neural network, modeled after biological neurons. It receives inputs, processes them, and produces an output.

2. Feedforward Neural Network: A type of neural network where connections between the nodes do not form a cycle. The information moves in only one direction — forward — from the input nodes, through the hidden nodes (if any), and to the output nodes. There are no cycles or loops in the network.

3. Backpropagation: A training algorithm for neural networks, used to minimize the error by adjusting the weights. It involves two phases:
Backward Propagation: Calculating the gradient of the loss function with respect to each weight by the chain rule, then updating the weights to reduce the error.
Weights and Biases: Parameters that are adjusted during training to minimize the error in the network’s predictions. Weights are applied to inputs, and biases are added to the weighted sum.

4. Activation Functions: Functions applied to the output of neurons to introduce non-linearity. Common functions include ReLU (Rectified Linear Unit), Sigmoid, and Tanh.

GitHub : ai-hope/src/main/org/ai/hope/matrix/nn at main · nirmal1067/ai-hope · GitHub

Code Breakdown

package org.ai.hope.matrix.nn;

import org.ai.hope.core.util.Logger;
import org.ai.hope.nn.Functions;

public class MatrixNeuralNetwork {

private MatrixLayer[] layers;

public MatrixNeuralNetwork(int inputs, int hiddenLayers, int[] hiddeLayerSizes, int outPuts)
{
layers= new MatrixLayer[hiddenLayers+1];
initilizeLayers(inputs, hiddenLayers, hiddeLayerSizes, outPuts);
}

public double[][] predict(double[][] input) {
double[][] layeredOutputs = null;
for (int i = 0; i < layers.length; i++) {
layeredOutputs = layers[i].predict(input);
input = layeredOutputs;
}
return layeredOutputs;
}

public void train(double[][] inputs, double[][] expectedOutPuts, double learningRate, double momentum) {
// TODO Auto-generated method stub

double[][] modelOutPut = this.predict(inputs);
double[][] errors = MatrixOperations.MatrixScalerOperation(expectedOutPuts, modelOutPut, Functions.SUBSTRACTION);

for(int i = layers.length-1;i>=0;i--)
{
errors = layers[i].train(errors, learningRate, momentum);
}

}

public void initilizeLayers(int inputs, int hiddenLayers, int[] hiddeLayerSizes, int outPuts) {
// TODO Auto-generated method stub

for (int i = 0; i < layers.length; i++) {
Logger.debug("Initializtaion for layer "+ i);
if (i == 0) {
layers[i] = new MatrixLayer(inputs, hiddeLayerSizes[i],i);
continue;
}

if (i == layers.length - 1) {
layers[i] = new MatrixLayer(hiddeLayerSizes[i - 1], outPuts,i);
continue;
}

layers[i] = new MatrixLayer(hiddeLayerSizes[i - 1], hiddeLayerSizes[i],i);

}
}

}

MatrixLayer

package org.ai.hope.matrix.nn;

import java.util.Random;

import org.ai.hope.core.util.Logger;
import org.ai.hope.nn.Functions;

public class MatrixLayer {

private double[][] weights;

private double[][] dWeights;

private double[][] input;

private double[][] output;

private int layer=0;

private Random random = new Random();

public MatrixLayer(int inputs, int outputs,int layerNumber) {
int size = 1 + inputs;
this.layer = layerNumber;

// Following matrix multiplication rules
this.weights = new double[size][outputs];
this.input = new double[1][size];
this.dWeights = new double[size][outputs];
this.output = new double[1][outputs];

// intitialize Random weights
this.initializeLayer();

}

private void initializeLayer() {
for (int r = 0; r < weights.length; r++) {
for (int c = 0; c < weights[0].length; c++) {
weights[r][c] = (random.nextDouble() - 0.5d) * 4d;
}
}
Logger.debug("Layer Number: "+ this.layer +" while initlializing Input network size: " + input.length+","+input[0].length + " Output Layer Size: " + output.length +","+output[0].length
+ " Weights size " + weights.length+","+weights[0].length);
}

public double[][] predict(double[][] inputs) {
//System.arraycopy(inputs, 0, input, 0, inputs.length);
MatrixOperations.DeepCopyingValues(inputs, input);
input[0][input[0].length - 1] = 1;
output = MatrixOperations.MatrixMultiplication(input, weights);
output = MatrixOperations.ApplyActivationFunction(output, Functions.SIGMOID);
Logger.debug("Layer Number: " + this.layer + " Printing output below");
Logger.printMatrix(output);
return MatrixOperations.DeepCopy(output);
}

public double[][] train(double[][] errors, double learningRate, double momentum) {
// TODO Auto-generated method stub
double[][] nextLayerError = nextLayerError(errors,learningRate,momentum);
double[][] internalOutputs = this.output;


// Check dimension of matrix before operations we may need transpose in few cases
double[][] tempMatrix1 = MatrixOperations.ApplyActivationFunction(internalOutputs, Functions.DSIGMOID);
tempMatrix1 = MatrixOperations.MatrixScalerOperation(tempMatrix1, learningRate, Functions.MULTIPLY);
double[] [] tempMatrix2 = MatrixOperations.MatrixScalerOperation(errors, learningRate, Functions.MULTIPLY);
tempMatrix2 = MatrixOperations.MatrixScalerOperation(tempMatrix1, tempMatrix2, Functions.MULTIPLY);

// This is have correct diemnesion of weight matrix while multiplication
tempMatrix1 = MatrixOperations.matrixTranspose(input);

tempMatrix2 = MatrixOperations.MatrixMultiplication(tempMatrix1, tempMatrix2);

double[][] previousDerivativeWithDelta = MatrixOperations.MatrixScalerOperation(dWeights, momentum, Functions.MULTIPLY);

previousDerivativeWithDelta = MatrixOperations.MatrixScalerOperation(tempMatrix2, previousDerivativeWithDelta, Functions.ADDITION);
this.weights = MatrixOperations.MatrixScalerOperation(weights, previousDerivativeWithDelta, Functions.ADDITION);
dWeights = tempMatrix2;

Logger.debug("Layer Number: " + this.layer + " Printing weights below");
Logger.printMatrix(weights);
Logger.debug("Dweights");
Logger.printMatrix(dWeights);
return nextLayerError;
}


private double[][] performAdjustmentByIncreasing(double[][] internalOutputs, double[][] errors) {
double[][] finalArray = new double[errors.length][errors[0].length];

for(int r=0;r<finalArray.length;r++)
{
for(int c=0;c<finalArray[0].length;c++)
{
if(c==finalArray.length-1)
{
finalArray[r][c]=1;
continue;
}

finalArray[r][c]= errors[r][c];
}
}
return finalArray;
}


private double[][] performAdjustmentByDecreasing(double[][] internalOutputs) {
double[][] finalArray = new double[internalOutputs.length-1][internalOutputs[0].length];

for(int r=0;r<finalArray.length;r++)
{
for(int c=0;c<finalArray[0].length;c++)
{
finalArray[r][c]= internalOutputs[r][c];
}
}
return finalArray;
}

private double[][] nextLayerError(double[][] errors, double learningRate, double momentum)
{
double[][] result = new double[input.length][input[0].length];

//if(this.layer>0)
// {
double[][] tempMatrix1 = MatrixOperations.ApplyActivationFunction(output, Functions.DSIGMOID);
tempMatrix1 = MatrixOperations.MatrixScalerOperation(errors, tempMatrix1, Functions.MULTIPLY);
Logger.printMatrixDimensions("ErrorMatrix", tempMatrix1);
// Transpose output as per matrix rules to get correct next layer error.

//tempMatrix1 = MatrixOperations.matrixTranspose(tempMatrix1);
double[] [] actualWeights = performAdjustmentByDecreasing(weights);
double[][] tempMatrix2 = MatrixOperations.matrixTranspose(actualWeights);
Logger.printMatrixDimensions("ErrorMatrix", tempMatrix1);
tempMatrix1 = MatrixOperations.MatrixMultiplication(tempMatrix1, tempMatrix2);

Logger.printMatrixDimensions("ErrorMatrix", tempMatrix1);
Logger.debug("Print Error matrix before Transpose");
Logger.printMatrix(tempMatrix1);
//result = MatrixOperations.matrixTranspose(tempMatrix1);
result = tempMatrix1;
Logger.debug("Layer Number: " + this.layer + " Printing error for previous Layer");
Logger.printMatrix(result);
//}


return result;

}

}

MatrixOperations

package org.ai.hope.matrix.nn;

import java.util.function.BiFunction;
import java.util.function.Function;

public class MatrixOperations {

// TODO u can do better , below codes time complexity id too bad.
public static double[][] MatrixMultiplication(double[][] input,double[][] weights)
{
if(input==null || weights==null || input[0].length<=0 || weights[0].length<=0)
{
throw new RuntimeException("Please provide proper matrix as input");
}

double[][] resultMatrix = new double[input.length][weights[0].length];

for(int r=0;r<resultMatrix.length;r++)
{
for( int c=0;c<resultMatrix[0].length;c++)
{
int firstMatrixRow = r;
int secondMatrixCol = c;
int numberOfIterations =0;
double value = 0;
while(numberOfIterations<input[0].length)
{
value = value + input[firstMatrixRow][numberOfIterations] * weights[numberOfIterations][secondMatrixCol];
numberOfIterations = numberOfIterations+1;
}

resultMatrix[r][c]= value;
}
}

return resultMatrix;
}


public static double[][] matrixTranspose(double[][] input)
{
if(input==null || input[0].length<=0)
{
throw new RuntimeException("Please provide proper matrix as input");
}

double[][] result = new double[input[0].length][input.length];

for(int r=0;r<input.length;r++)
{
for(int c=0;c<input[0].length;c++)
{
result[c][r]= input[r][c];
}
}

return result;
}


public static double[][] ApplyActivationFunction(double[][] matrix, Function<Double, Double> activationFunction)
{
double[][] resultMatrix = new double[matrix.length][matrix[0].length];
for( int r=0;r<matrix.length;r++)
{
for(int c=0;c<matrix[0].length;c++)
{
resultMatrix[r][c]= activationFunction.apply(matrix[r][c]);
}
}

return resultMatrix;
}


public static double[][] MatrixScalerOperation(double[][] leftMatrix,double[][] rightMatrix, BiFunction<Double, Double,Double> opertaion)
{
double[][] resultMatrix = new double[leftMatrix.length][leftMatrix[0].length];

for(int r=0;r<resultMatrix.length;r++)
{
for(int c=0;c<resultMatrix[0].length;c++)
{
resultMatrix[r][c]= opertaion.apply(leftMatrix[r][c], rightMatrix[r][c]);
}
}

return resultMatrix;
}


public static double[][] MatrixScalerOperation(double[][] leftMatrix,double value, BiFunction<Double, Double,Double> opertaion)
{
double[][] resultMatrix = new double[leftMatrix.length][leftMatrix[0].length];

for(int r=0;r<resultMatrix.length;r++)
{
for(int c=0;c<resultMatrix[0].length;c++)
{
resultMatrix[r][c]= opertaion.apply(leftMatrix[r][c], value);
}
}

return resultMatrix;
}

public static double[][] DeepCopy(double[][] source)
{
double[][] resultMatrix = new double[source.length][source[0].length];
for(int r =0;r<source.length;r++)
{
for(int c=0;c<source[0].length;c++)
{
resultMatrix[r][c]= source[r][c];
}
}

return resultMatrix;
}

public static double[][] DeepCopyingValues(double[] [] source, double[][] destination)
{
for(int r =0;r<source.length;r++)
{
for(int c=0;c<source[0].length;c++)
{
destination[r][c]= source[r][c];
}
}

return destination;
}

}

Test to Run Neural Network

package org.ai.hope.alogrithm.runner;

import java.util.List;

import org.ai.hope.core.util.Logger;
import org.ai.hope.matrix.nn.MatrixNeuralNetwork;
import org.ai.hope.nn.NeuralNetwork;



public class MatrixNeuralNetworkRunner {
public static final double LEARNING_RATE = 0.3d;
public static final double MOMENTUM = 0.6d;
public static final int ITERATIONS = 100000;


public static void singleSetTest()
{
double[][] trainingData = new double[][] {
new double[] { 0, 0 }

};

double[][] trainingResults = new double[][] {
new double[] { 0 }

};

int[] hiddenLayerSize = {50,50};
//Logger.ENABLE_DEBUG_MODE=true;
NeuralNetwork neuralNetwork = new NeuralNetwork(2,2,hiddenLayerSize,1);

MatrixNeuralNetwork matrixNeuralNetwork = new MatrixNeuralNetwork(2, 2, hiddenLayerSize, 1);

for (int iterations = 0; iterations < ITERATIONS; iterations++) {

for (int i = 0; i < trainingResults.length; i++) {
matrixNeuralNetwork.train(trainingData, trainingResults,
LEARNING_RATE, MOMENTUM);
}

System.out.println();
for (int i = 0; i < trainingResults.length; i++) {
double[] t = trainingData[i];
Logger.info("Epoch: "+ iterations + 1);
Logger.info(" Training data " + t[0]+","+ t[1] +" Actual data: "+neuralNetwork.predict(t)[0]);
}
}
}

public static void main(String[] args)
{

double[][] trainingData = new double[][] {
new double[] { 0, 0 },
new double[] { 0, 1 },
new double[] { 1, 0 },
new double[] { 1, 1 }
};

double[][] trainingResults = new double[][] {
new double[] { 0 },
new double[] { 1 },
new double[] { 1 },
new double[] { 0 }
};


int[] hiddenLayerSize = {3};
//Logger.ENABLE_DEBUG_MODE=true;
//NeuralNetwork neuralNetwork = new NeuralNetwork(2,2,hiddenLayerSize,1);
//Logger.ENABLE_DEBUG_MODE=true;
MatrixNeuralNetwork matrixNeuralNetwork = new MatrixNeuralNetwork(2, 1, hiddenLayerSize, 1);

for (int iterations = 0; iterations < ITERATIONS; iterations++) {

for (int i = 0; i < trainingResults.length; i++) {
double[][] tempTrain = new double[1][trainingData[0].length];
double[][] tempOutPut = new double[1][trainingResults[0].length];

tempTrain[0][0]=trainingData[i][0];
tempTrain[0][1]=trainingData[i][1];
tempOutPut[0][0]=trainingResults[i][0];
matrixNeuralNetwork.train(tempTrain, tempOutPut,LEARNING_RATE, MOMENTUM);
}

System.out.println();
for (int i = 0; i < trainingResults.length; i++) {
double[][] tempTrain = new double[1][trainingData[0].length];
tempTrain[0][0]=trainingData[i][0];
tempTrain[0][1]=trainingData[i][1];
Logger.info("Epoch: "+ iterations + 1);
Logger.info(" Training data " + tempTrain[0][0]+","+ tempTrain[0][1] +" Actual data: "+matrixNeuralNetwork.predict(tempTrain)[0][0]);
}
}

}

}

--

--