Building a Neural Network from Scratch in Java

6 min readJun 2, 2024

Key Concepts of Neural Networks

1. Artificial Neuron: The basic unit of a neural network, modeled after biological neurons. It receives inputs, processes them, and produces an output.

2. Feedforward Neural Network: A type of neural network where connections between the nodes do not form a cycle. The information moves in only one direction — forward — from the input nodes, through the hidden nodes (if any), and to the output nodes. There are no cycles or loops in the network.

3. Backpropagation: A training algorithm for neural networks, used to minimize the error by adjusting the weights. It involves two phases:
— Backward Propagation: Calculating the gradient of the loss function with respect to each weight by the chain rule, then updating the weights to reduce the error.
— Weights and Biases: Parameters that are adjusted during training to minimize the error in the network’s predictions. Weights are applied to inputs, and biases are added to the weighted sum.

4. Activation Functions: Functions applied to the output of neurons to introduce non-linearity. Common functions include ReLU (Rectified Linear Unit), Sigmoid, and Tanh.

GitHub : ai-hope/src/main/org/ai/hope/matrix/nn at main · nirmal1067/ai-hope · GitHub

Code Breakdown

package org.ai.hope.matrix.nn;

import org.ai.hope.core.util.Logger;
import org.ai.hope.nn.Functions;

public class MatrixNeuralNetwork {

 private MatrixLayer[] layers;
 
 public MatrixNeuralNetwork(int inputs, int hiddenLayers, int[] hiddeLayerSizes, int outPuts)
 {
  layers= new MatrixLayer[hiddenLayers+1];
  initilizeLayers(inputs, hiddenLayers, hiddeLayerSizes, outPuts);
 }

 public double[][] predict(double[][] input) {
  double[][] layeredOutputs = null;
  for (int i = 0; i < layers.length; i++) {
   layeredOutputs = layers[i].predict(input);
   input = layeredOutputs;
  }
  return layeredOutputs;
 }

 public void train(double[][] inputs, double[][] expectedOutPuts, double learningRate, double momentum) {
  // TODO Auto-generated method stub
  
  double[][] modelOutPut = this.predict(inputs);
  double[][] errors  = MatrixOperations.MatrixScalerOperation(expectedOutPuts, modelOutPut, Functions.SUBSTRACTION);
  
     for(int i = layers.length-1;i>=0;i--)
     {
      errors = layers[i].train(errors, learningRate, momentum);
     }

 }

 public void initilizeLayers(int inputs, int hiddenLayers, int[] hiddeLayerSizes, int outPuts) {
  // TODO Auto-generated method stub

  for (int i = 0; i < layers.length; i++) {
   Logger.debug("Initializtaion for layer "+ i);
   if (i == 0) {
    layers[i] = new MatrixLayer(inputs, hiddeLayerSizes[i],i);
    continue;
   }

   if (i == layers.length - 1) {
    layers[i] = new MatrixLayer(hiddeLayerSizes[i - 1], outPuts,i);
    continue;
   }

   layers[i] = new MatrixLayer(hiddeLayerSizes[i - 1], hiddeLayerSizes[i],i);

  }
 }

}

MatrixLayer

package org.ai.hope.matrix.nn;

import java.util.Random;

import org.ai.hope.core.util.Logger;
import org.ai.hope.nn.Functions;

public class MatrixLayer {

 private double[][] weights;

 private double[][] dWeights;

 private double[][] input;

 private double[][] output;
 
 private int layer=0;

 private Random random = new Random();

 public MatrixLayer(int inputs, int outputs,int layerNumber) {
  int size = 1 + inputs;
  this.layer = layerNumber;
  
  // Following matrix multiplication rules
  this.weights = new double[size][outputs];
  this.input = new double[1][size];
  this.dWeights = new double[size][outputs];
  this.output = new double[1][outputs];

  // intitialize Random weights
  this.initializeLayer();

 }

 private void initializeLayer() {
  for (int r = 0; r < weights.length; r++) {
   for (int c = 0; c < weights[0].length; c++) {
    weights[r][c] = (random.nextDouble() - 0.5d) * 4d;
   }
  }
  Logger.debug("Layer Number: "+ this.layer  +" while initlializing Input network size: " + input.length+","+input[0].length + " Output Layer Size: " + output.length +","+output[0].length
    + " Weights size " + weights.length+","+weights[0].length);
 }

 public double[][] predict(double[][] inputs) {
  //System.arraycopy(inputs, 0, input, 0, inputs.length);
  MatrixOperations.DeepCopyingValues(inputs, input);
  input[0][input[0].length - 1] = 1;
  output = MatrixOperations.MatrixMultiplication(input, weights);
  output = MatrixOperations.ApplyActivationFunction(output, Functions.SIGMOID);
  Logger.debug("Layer Number: " + this.layer + " Printing output below");
  Logger.printMatrix(output);
  return MatrixOperations.DeepCopy(output);
 }

 public double[][] train(double[][] errors, double learningRate, double momentum) {
  // TODO Auto-generated method stub
  double[][] nextLayerError = nextLayerError(errors,learningRate,momentum);
  double[][] internalOutputs = this.output;
  
  
  // Check dimension of matrix before operations we may need transpose in few cases
  double[][] tempMatrix1 = MatrixOperations.ApplyActivationFunction(internalOutputs, Functions.DSIGMOID);
  tempMatrix1 = MatrixOperations.MatrixScalerOperation(tempMatrix1, learningRate, Functions.MULTIPLY);
  double[] [] tempMatrix2 = MatrixOperations.MatrixScalerOperation(errors, learningRate, Functions.MULTIPLY);
  tempMatrix2 = MatrixOperations.MatrixScalerOperation(tempMatrix1, tempMatrix2, Functions.MULTIPLY);
  
  // This is have correct diemnesion of weight matrix while multiplication
  tempMatrix1 = MatrixOperations.matrixTranspose(input);
  
  tempMatrix2 = MatrixOperations.MatrixMultiplication(tempMatrix1, tempMatrix2);
  
  double[][] previousDerivativeWithDelta = MatrixOperations.MatrixScalerOperation(dWeights, momentum, Functions.MULTIPLY);
  
  previousDerivativeWithDelta = MatrixOperations.MatrixScalerOperation(tempMatrix2, previousDerivativeWithDelta, Functions.ADDITION);
  this.weights = MatrixOperations.MatrixScalerOperation(weights, previousDerivativeWithDelta, Functions.ADDITION);
  dWeights = tempMatrix2;
  
  Logger.debug("Layer Number: " + this.layer + " Printing weights below");
  Logger.printMatrix(weights);
  Logger.debug("Dweights");
  Logger.printMatrix(dWeights);
  return nextLayerError;
 }
 

 private double[][] performAdjustmentByIncreasing(double[][] internalOutputs, double[][] errors) {
  double[][] finalArray = new double[errors.length][errors[0].length];
  
  for(int r=0;r<finalArray.length;r++)
  {
   for(int c=0;c<finalArray[0].length;c++)
   {
    if(c==finalArray.length-1)
    {
     finalArray[r][c]=1;
     continue;
    }
    
    finalArray[r][c]= errors[r][c];
   }
  }
  return finalArray;
 }
 
 
 private double[][] performAdjustmentByDecreasing(double[][] internalOutputs) {
  double[][] finalArray = new double[internalOutputs.length-1][internalOutputs[0].length];
  
  for(int r=0;r<finalArray.length;r++)
  {
   for(int c=0;c<finalArray[0].length;c++)
   {
    finalArray[r][c]= internalOutputs[r][c];
   }
  }
  return finalArray;
 }

 private double[][] nextLayerError(double[][] errors, double learningRate, double momentum)
 {
  double[][] result  = new double[input.length][input[0].length];
  
  //if(this.layer>0)
 // {
   double[][] tempMatrix1 = MatrixOperations.ApplyActivationFunction(output, Functions.DSIGMOID);
   tempMatrix1 = MatrixOperations.MatrixScalerOperation(errors, tempMatrix1, Functions.MULTIPLY);
   Logger.printMatrixDimensions("ErrorMatrix", tempMatrix1);
   // Transpose output as per matrix rules to get correct next layer error.
   
   //tempMatrix1 = MatrixOperations.matrixTranspose(tempMatrix1);
   double[] [] actualWeights = performAdjustmentByDecreasing(weights);
   double[][] tempMatrix2 = MatrixOperations.matrixTranspose(actualWeights);
   Logger.printMatrixDimensions("ErrorMatrix", tempMatrix1);
   tempMatrix1 = MatrixOperations.MatrixMultiplication(tempMatrix1, tempMatrix2);
   
   Logger.printMatrixDimensions("ErrorMatrix", tempMatrix1);
   Logger.debug("Print Error matrix before Transpose");
   Logger.printMatrix(tempMatrix1);
   //result = MatrixOperations.matrixTranspose(tempMatrix1);
   result = tempMatrix1;
   Logger.debug("Layer Number: " + this.layer + " Printing error for previous Layer");
   Logger.printMatrix(result);
  //}
  
  
  return result;
  
 }

}

MatrixOperations

package org.ai.hope.matrix.nn;

import java.util.function.BiFunction;
import java.util.function.Function;

public class MatrixOperations {

 // TODO u can do better , below codes time complexity id too bad.
 public static double[][] MatrixMultiplication(double[][] input,double[][] weights)
 {
  if(input==null || weights==null || input[0].length<=0 || weights[0].length<=0)
  {
   throw new RuntimeException("Please provide proper matrix as input");
  }
  
  double[][] resultMatrix = new double[input.length][weights[0].length];
  
  for(int r=0;r<resultMatrix.length;r++)
  {
   for( int c=0;c<resultMatrix[0].length;c++)
   {
    int firstMatrixRow = r;
    int secondMatrixCol = c;
    int numberOfIterations =0;
    double value = 0;
    while(numberOfIterations<input[0].length)
    {
     value = value + input[firstMatrixRow][numberOfIterations] * weights[numberOfIterations][secondMatrixCol];
     numberOfIterations = numberOfIterations+1;
    }
    
    resultMatrix[r][c]= value;
   }
  }
  
  return resultMatrix;
 }
 
 
 public static double[][] matrixTranspose(double[][] input)
 {
  if(input==null || input[0].length<=0)
  {
   throw new RuntimeException("Please provide proper matrix as input");
  }
  
  double[][] result = new double[input[0].length][input.length];
  
  for(int r=0;r<input.length;r++)
  {
   for(int c=0;c<input[0].length;c++)
   {
    result[c][r]= input[r][c];
   }
  }
  
  return result;
 }
 
 
 public static double[][] ApplyActivationFunction(double[][] matrix, Function<Double, Double> activationFunction)
 {
  double[][] resultMatrix = new double[matrix.length][matrix[0].length];
  for( int r=0;r<matrix.length;r++)
  {
   for(int c=0;c<matrix[0].length;c++)
   {
    resultMatrix[r][c]= activationFunction.apply(matrix[r][c]);
   }
  }
  
  return resultMatrix;
 }
 
 
 public static double[][] MatrixScalerOperation(double[][] leftMatrix,double[][] rightMatrix, BiFunction<Double, Double,Double> opertaion)
 {
  double[][] resultMatrix = new double[leftMatrix.length][leftMatrix[0].length];
  
  for(int r=0;r<resultMatrix.length;r++)
  {
   for(int c=0;c<resultMatrix[0].length;c++)
   {
    resultMatrix[r][c]= opertaion.apply(leftMatrix[r][c], rightMatrix[r][c]);
   }
  }
  
  return resultMatrix;
 }
 
 
 public static double[][] MatrixScalerOperation(double[][] leftMatrix,double value, BiFunction<Double, Double,Double> opertaion)
 {
  double[][] resultMatrix = new double[leftMatrix.length][leftMatrix[0].length];
  
  for(int r=0;r<resultMatrix.length;r++)
  {
   for(int c=0;c<resultMatrix[0].length;c++)
   {
    resultMatrix[r][c]= opertaion.apply(leftMatrix[r][c], value);
   }
  }
  
  return resultMatrix;
 }
 
 public static double[][] DeepCopy(double[][] source)
 {
  double[][] resultMatrix = new double[source.length][source[0].length];
  for(int r =0;r<source.length;r++)
  {
   for(int c=0;c<source[0].length;c++)
   {
    resultMatrix[r][c]= source[r][c];
   }
  }
  
  return resultMatrix;
 }
 
 public static double[][] DeepCopyingValues(double[] [] source, double[][] destination)
 {
  for(int r =0;r<source.length;r++)
  {
   for(int c=0;c<source[0].length;c++)
   {
    destination[r][c]= source[r][c];
   }
  }
  
  return destination;
 }

}

Test to Run Neural Network

package org.ai.hope.alogrithm.runner;

import java.util.List;

import org.ai.hope.core.util.Logger;
import org.ai.hope.matrix.nn.MatrixNeuralNetwork;
import org.ai.hope.nn.NeuralNetwork;



public class MatrixNeuralNetworkRunner {
 public static final double LEARNING_RATE = 0.3d;
 public static final double MOMENTUM = 0.6d;
 public static final int ITERATIONS = 100000;

 
 public static void singleSetTest()
 {
  double[][] trainingData = new double[][] { 
   new double[] { 0, 0 }
   
  };

 double[][] trainingResults = new double[][] {
   new double[] { 0 }
   
  };
  
  int[] hiddenLayerSize = {50,50};
  //Logger.ENABLE_DEBUG_MODE=true;
  NeuralNetwork neuralNetwork = new NeuralNetwork(2,2,hiddenLayerSize,1);
  
  MatrixNeuralNetwork matrixNeuralNetwork = new MatrixNeuralNetwork(2, 2, hiddenLayerSize, 1);

  for (int iterations = 0; iterations < ITERATIONS; iterations++) {

   for (int i = 0; i < trainingResults.length; i++) {
    matrixNeuralNetwork.train(trainingData, trainingResults,
      LEARNING_RATE, MOMENTUM);
   }

   System.out.println();
   for (int i = 0; i < trainingResults.length; i++) {
    double[] t = trainingData[i];
    Logger.info("Epoch: "+ iterations + 1);
    Logger.info(" Training data " +  t[0]+","+ t[1] +" Actual data:  "+neuralNetwork.predict(t)[0]);
   }
  }
 }
 
 public static void main(String[] args)
 {
 
 double[][] trainingData = new double[][] { 
  new double[] { 0, 0 }, 
  new double[] { 0, 1 }, 
  new double[] { 1, 0 },
  new double[] { 1, 1 } 
 };

 double[][] trainingResults = new double[][] {
  new double[] { 0 }, 
  new double[] { 1 }, 
  new double[] { 1 },
  new double[] { 0 } 
 };
 
 
 int[] hiddenLayerSize = {3};
 //Logger.ENABLE_DEBUG_MODE=true;
 //NeuralNetwork neuralNetwork = new NeuralNetwork(2,2,hiddenLayerSize,1);
 //Logger.ENABLE_DEBUG_MODE=true;
 MatrixNeuralNetwork matrixNeuralNetwork = new MatrixNeuralNetwork(2, 1, hiddenLayerSize, 1);

 for (int iterations = 0; iterations < ITERATIONS; iterations++) {

  for (int i = 0; i < trainingResults.length; i++) {
   double[][] tempTrain = new double[1][trainingData[0].length];
   double[][] tempOutPut = new double[1][trainingResults[0].length];
   
   tempTrain[0][0]=trainingData[i][0];
   tempTrain[0][1]=trainingData[i][1];
   tempOutPut[0][0]=trainingResults[i][0];
   matrixNeuralNetwork.train(tempTrain, tempOutPut,LEARNING_RATE, MOMENTUM);
  }

  System.out.println();
  for (int i = 0; i < trainingResults.length; i++) {
   double[][] tempTrain = new double[1][trainingData[0].length];
   tempTrain[0][0]=trainingData[i][0];
   tempTrain[0][1]=trainingData[i][1];
   Logger.info("Epoch: "+ iterations + 1);
   Logger.info(" Training data " +  tempTrain[0][0]+","+ tempTrain[0][1] +" Actual data:  "+matrixNeuralNetwork.predict(tempTrain)[0][0]);
  }
 }
 
 }

}

Building a Neural Network from Scratch in Java

Written by Learner1067