Vanilla NN vs CNN

Jaabir
featurepreneur
Published in
6 min readJan 6, 2022

why not to use plain NN for the image classification

Although each dataset varies CNN generally works so good with the image classifications. why is that ? well CNN uses convolution on the images along with pooling. they act like a feature extraction.

for eg:

  • Each convolution layer does convolution method on the input data image
  • Each convolution layer needs # of filters, kernel size and strides.
  • they tends to detect edges and other important features in the image.

let’s see the code example on why to use CNN for the image dataset, although there are many models to work with the images, by default we need to try using CNN and not plain NN.

Let’s use this dataset for simplicity : https://www.kaggle.com/cactus3/basicshapes

I am only going to show the model building and comparison but if you like you can see the whole notebook here : https://www.kaggle.com/muhammedjaabir/cnn-vs-vanilla-nn/notebook

Before starting let’s import the required modules

import tensorflow as tf 
import tensorflow.keras as keras
from tensorflow.keras import Sequential
from tensorflow.keras.utils import plot_model, to_categorical
from tensorflow.keras.layers import Dense, Input, Dropout, BatchNormalization, Flatten
from sklearn.metrics import classification_report, confusion_matrix
from sklearn.model_selection import train_test_split
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow_addons as tfa


loss = keras.losses
metrics = keras.metrics

Also make sure to split the dataset into train and test/dev set. its upto you use you can either use any of these steps to split the dataset

  • Read all the images, flatten them and then store it in a dataframe along with their class. the shape would be (n, m) where n is number of images and m is the each pixels values flattened
  • Store the image path in a dataframe with their class and then use the sklearn’s train_test_split method to split the dataset and use that in keras image generator ( I have used this method )
  • Create a seperate folder for training and testing and use that in keras image generator

Model Building

plain NN:

class Network:
def __init__(self, layers = [], activations = [], dropout = [], batchnorm = [], compile_params = {}, flatten = False):
self.model = None
self.losses = None
self.metrics = None
self.layers = layers
self.activations = activations
self.compile_params = compile_params
self.dropout = dropout if len(dropout) == (len(activations) - 1) else [None for _ in range(len(activations) - 1)]
self.batchnorm = batchnorm if len(batchnorm) == (len(activations) - 1) else [None for _ in range(len(activations) - 1)]
self.flatten = flatten
self.initialize_model()

def initialize_model(self):

self.model = Sequential()

# input layer
self.model.add(Input(shape = self.layers[0], name = 'Input_Layer'))

if self.flatten:
self.model.add(Flatten())

# hidden layers
for idx in range(len(self.layers) - 2):
units = self.layers[idx + 1]
activation = self.activations[idx]
dp = self.dropout[idx]
bn = self.batchnorm[idx]
self.model.add(Dense(units, activation = activation, name = f'Hidden_Layer_{idx + 1}'))
if bn:
self.model.add(BatchNormalization())
if dp:
self.model.add(Dropout(dp, name = f'Dropout_{idx + 1}_{dp}'))

# output layer
self.model.add(Dense(self.layers[-1], activation = self.activations[-1], name = 'Output_Layer'))

self.model.compile(**self.compile_params)
return self.model

def fit(self, fit_params):
history = self.model.fit(**fit_params)
return self.store_history(history)

def store_history(self, history):
his = pd.DataFrame(history.history)
l = [c for c in his.columns if 'loss' in c]
m = [c for c in his.columns if 'loss' not in c]
self.losses = his[l]
self.metrics = his[m]
return his

def fit_generator(self, fit_params):
history = self.model.fit_generator(**fit_params)
return self.store_history(history)

def predict(self, x, softmax = 0):
pred = self.model.predict(x)
if softmax == 0:
return np.argmax(pred, axis = 1)
return pred

def predict_generator(self, xgen, softmax = 0):
pred = self.model.predict_generator(xgen)
if softmax == 0:
return np.argmax(pred, axis = 1)
return pred

def plot_arch(self):
if self.model is not None:
return plot_model(self.model, show_shapes = True, show_layer_names = True)

def plot_loss(self):
if self.losses is not None:
self.losses.plot(kind = 'line')
plt.title('loss comparison')
plt.legend(self.losses.columns)

def plot_metrics(self):
if self.metrics is not None:
self.metrics.plot(kind = 'line')
plt.title('metrics comparison')
plt.legend(self.metrics.columns)


def report(yt, pred, inverse_to_cat = True):
if inverse_to_cat:
yt = np.argmax(yt, axis = 1)
print(classification_report(yt, pred))
sns.heatmap(confusion_matrix(yt, pred), fmt = '.1f', annot = True)
plt.title('confusion matrix')

def hardmax(y):
return np.argmax(y, axis = 1)

def sample(x, y, frac, return_val = False):
x, xt, y, yt = train_test_split(x, y, test_size = frac, stratify = y)
if return_val:
return x, xt, y, yt
return x, y

def plot(history, kind):
his = pd.DataFrame(history.history)
l = [c for c in his.columns if 'loss' in c]
m = [c for c in his.columns if 'loss' not in c]
losses = his[l]
metrics = his[m]
if losses is not None and kind == 'loss':
losses.plot(kind = 'line')
plt.title('loss comparison')
plt.legend(losses.columns)
if metrics is not None and kind == 'metrics':
metrics.plot(kind = 'line')
plt.title('metrics comparison')
plt.legend(metrics.columns)

model 1 ( plain NN ) :

layers = [(28, 28, 3), 128, 3]
activations = ['leaky_relu', 'sigmoid']
dropout = []
batchNorm = []

compile_params = {
'loss' : 'categorical_crossentropy',
'optimizer' : keras.optimizers.RMSprop(learning_rate = 0.01),
'metrics' : ['accuracy']
}

model1 = Network(layers, activations, dropout = dropout, batchnorm = batchNorm, flatten = True, compile_params = compile_params)
model1.plot_arch()
class myCb(tf.keras.callbacks.Callback):
def on_epoch_end(self, epoch, logs={}):
if logs.get("val_accuracy") >= 0.95:
print("Reached 95% accuracy so cancelling training!")
self.model.stop_training = True
callbacks = myCb()
fit_params = {
'generator' : x_train_gen,
'validation_data' : x_dev_gen,
'epochs' : 10,
'callbacks': [callbacks]
}

his = model1.fit_generator(fit_params)
model1.plot_metrics()

you can see that validation accuracy is same for every iteration. it means that the model is not generalizing well. In this case our model 1 ( plain NN ) have high bias and variance

CNN :

from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, ReLU def Cnn():     
## input layer
ip = Input(shape = (28, 28, 3))

## convolution layer 1
c1 = Conv2D(filters = 32, kernel_size = (3,3), strides = 1, padding = 'same')(ip)
a1 = ReLU()(c1)
mp1 = MaxPool2D(pool_size = (2,2), strides = (2,2))(a1)
## convolution layer 2
c2 = Conv2D(filters = 64, kernel_size = (3,3), strides = 1, padding = 'same')(mp1)
a2 = ReLU()(c2)
mp2 = MaxPool2D(pool_size = (2,2), strides = (2,2))(a2)
## flatten
F = Flatten()(mp2)
## dropout
dp = Dropout(0.2)
## hidden dense layer 3
h3 = Dense(256, activation = 'relu')(F)

## output dense layer 4
op = Dense(3, activation = 'sigmoid')(h3)
model = keras.Model(inputs = ip, outputs = op)
return model

Model 2 ( cnn ) :

model2 = Cnn()
model2.compile(
loss = 'categorical_crossentropy',
optimizer = keras.optimizers.RMSprop(learning_rate = 0.001),
metrics = ['accuracy']
)
model2.summary()Model: "model"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
input_1 (InputLayer) [(None, 28, 28, 3)] 0
_________________________________________________________________
conv2d (Conv2D) (None, 28, 28, 32) 896
_________________________________________________________________
re_lu (ReLU) (None, 28, 28, 32) 0
_________________________________________________________________
max_pooling2d (MaxPooling2D) (None, 14, 14, 32) 0
_________________________________________________________________
conv2d_1 (Conv2D) (None, 14, 14, 64) 18496
_________________________________________________________________
re_lu_1 (ReLU) (None, 14, 14, 64) 0
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 7, 7, 64) 0
_________________________________________________________________
flatten_1 (Flatten) (None, 3136) 0
_________________________________________________________________
dense (Dense) (None, 256) 803072
_________________________________________________________________
dense_1 (Dense) (None, 3) 771
=================================================================
Total params: 823,235
Trainable params: 823,235
Non-trainable params: 0
_________________________________________________________________

the above one is the model summary

fit_params = {
'generator' : x_train_gen,
'validation_data' : x_dev_gen,
'epochs' : 100,
'callbacks': [callbacks]
}
his = model2.fit_generator(**fit_params)
plot(his, 'metrics')

As you can see that CNN model did so much better with less number of images compared to the plain NN.

--

--