Learning CNN (with Image Data) using Simple PYTHON Programs

Published in

Analytics Vidhya

6 min readApr 7, 2018

[Edited & revised on July, 2023]

Here I shall try to share my experience while I was learning CNN. I have put simple small examples (codes) to get understood quickly. Python (≥3.6) & Tensorflow (≥2.3) are used. Jupyter notebook is necessary to run these examples. What’s more? Run the codes & have fun!

1. Handwriting Recognition

Here MNIST dataset is getting downloaded. After training & validating the model, performance is getting estimated using test data. GPU/ higher-RAM is required to run the code. Internet connection is also required.

#importing libraries
import numpy
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras import utils
from tensorflow.keras import backend as K
from random import *#loading MNIST data & reshaping
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1).astype('float32')
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1).astype('float32')#data pre-processing
X_train = X_train / 255
X_test = X_test / 255
y_train = utils.to_categorical(y_train)
y_test = utils.to_categorical(y_test)
num_classes = y_test.shape[1]#function for creating deep network model
def create_model():
 model = Sequential()
 model.add(Conv2D(32, (3, 3), input_shape=(28, 28,1), activation='relu'))
 model.add(MaxPooling2D(pool_size=(2, 2)))
 model.add(Dropout(0.2))
 model.add(Flatten())
 model.add(Dense(128, activation='relu'))
 model.add(Dense(num_classes, activation='softmax'))
 model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
 return model#training, validating & testing
model = create_model()
model.summary()
model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=200, verbose=1)
scores = model.evaluate(X_test, y_test, verbose=1)
print("CNN Error: %.2f%%" % (100-scores[1]*100))

2. Object Recognition

Here VGG16 network pre-trained with IMAGENET dataset is used to recognize an object (real life common object). GPU is not required. Internet connection is required.

#importing libraries
import numpy as np
from IPython.display import Image, display
from tensorflow.keras.applications import VGG16, imagenet_utils
from tensorflow.keras.preprocessing.image import img_to_array, load_img#pre-processing input
inputShape = (224, 224)
preprocess = imagenet_utils.preprocess_input#loading VGG16 with 'imagenet' pre-trained weights
model = VGG16(weights="imagenet")#displaying, loading & pre-processing test image (one needs to give path for his test image)
display(Image('./test.jpg'))
image = load_img("./test.jpg", target_size=inputShape)
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
image = preprocess(image)#predicting the output
preds = model.predict(image)
P = imagenet_utils.decode_predictions(preds)
for (i, (imagenetID, label, prob)) in enumerate(P[0]):
 print("{}. {}: {:.2f}%".format(i + 1, label, prob * 100))

3. Single Object Detection (with Bounding Box)

Here dataset is getting created. Each image contains a rectangle as the object. A simple Neural Network is used. GPU/ Internet is not required.

#importing libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib#creating database
num_imgs = 1000
img_size = 8
min_object_size = 1
max_object_size = 4
num_objects = 1
bboxes = np.zeros((num_imgs, num_objects, 4))
imgs = np.zeros((num_imgs, img_size, img_size))  # set background to 0
for i_img in range(num_imgs):
    for i_object in range(num_objects):
        w, h = np.random.randint(min_object_size, max_object_size, size=2)
        x = np.random.randint(0, img_size - w)
        y = np.random.randint(0, img_size - h)
        imgs[i_img, x:x+w, y:y+h] = 1.  # set rectangle to 1
        bboxes[i_img, i_object] = [x, y, w, h]
        
imgs.shape, bboxes.shape#plotting sample data
i = 0
plt.imshow(imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
for bbox in bboxes[i]:
    plt.gca().add_patch(matplotlib.patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], ec='r', fc='none'))
    
#reshaping input
X = (imgs.reshape(num_imgs, -1) - np.mean(imgs)) / np.std(imgs)
X.shape, np.mean(X), np.std(X)#reshaping output
y = bboxes.reshape(num_imgs, -1) / img_size
y.shape, np.mean(y), np.std(y)#final training & testing data
i = int(0.8 * num_imgs)
train_X = X[:i]
test_X = X[i:]
train_y = y[:i]
test_y = y[i:]
test_imgs = imgs[i:]
test_bboxes = bboxes[i:]#creating deep network model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D 
from tensorflow.keras.optimizers import SGD
model = Sequential([
        Dense(500, input_dim=X.shape[-1]),
        Activation('relu'),
        Dense(300), 
        Activation('relu'), 
        Dense(100), 
        Activation('relu'), 
        Dropout(0.2), 
        Dense(y.shape[-1])
    ])
model.compile('adadelta', 'mse')#training & validating
model.fit(train_X, train_y, epochs=100, validation_data=(test_X, test_y), verbose=2)#predicting on test data
pred_y = model.predict(test_X)
pred_bboxes = pred_y * img_size
pred_bboxes = pred_bboxes.reshape(len(pred_bboxes), num_objects, -1)
pred_bboxes.shape#plotting the prediction
plt.figure(figsize=(12, 3))
for i_subplot in range(1, 6):
    plt.subplot(1, 5, i_subplot)
    i = np.random.randint(len(test_imgs))
    plt.imshow(test_imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
    for pred_bbox, exp_bbox in zip(pred_bboxes[i], test_bboxes[i]):
        plt.gca().add_patch(matplotlib.patches.Rectangle((pred_bbox[0], pred_bbox[1]), pred_bbox[2], pred_bbox[3], ec='r', fc='none'))

4. Multiple Objects Detection (with Shapes)

Here dataset is getting created. Read the comments carefully. GPU/ Internet is not required.

# importing libraries
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
# creating dataset
# here 0-4 black objects (different shapes with random sizes) are placed in a noisy image (24 x 24). 
# the image is divided into 4 quadrants (w.r.t. image center) & each quadrant contains 0-1 object randomly.
# 4000 such images are taken.
# the objects with rectangular & lower-triangular shapes are of our interest.
# the upper-traingular shapes are dummy.
# due to randomness few images may be blank or with upper-triangular shape (dummy object) only.
# bounding boxes of the interested objects are also saved.
num_imgs = 4000
img_size = 24
min_rect_size = 3
max_rect_size = 9
max_num_objects = 5
bboxes = np.zeros((num_imgs, max_num_objects, 4))
imgs = np.random.rand(num_imgs, img_size, img_size)
shapes = np.zeros((num_imgs, max_num_objects, 1))
for i_img in range(num_imgs):
    i_object = 0
    if np.random.choice([True, False]):
        width, height = np.random.randint(min_rect_size, max_rect_size, size=2)
        x = np.random.randint(0, img_size/2 - width)
        y = np.random.randint(0, img_size/2 - height)
        imgs[i_img, x:x+width, y:y+height] = 1.
        bboxes[i_img, i_object] = [x, y, width, height]
        shapes[i_img, i_object] = [0]
        i_object += 1
    if np.random.choice([True, False]):
        size = np.random.randint(min_rect_size, max_rect_size)
        x, y = np.random.randint(img_size/2, img_size - size, size=2)
        mask = np.tril_indices(size)
        imgs[i_img, x + mask[0], y + mask[1]] = 1.
        bboxes[i_img, i_object] = [x, y, size, size]
        shapes[i_img, i_object] = [1]
        i_object += 1
    if np.random.choice([True, False]):
        width, height = np.random.randint(min_rect_size, max_rect_size, size=2)
        x = np.random.randint(img_size/2, img_size - width)
        y = np.random.randint(0, img_size/2 - height)
        imgs[i_img, x:x+width, y:y+height] = 1.
        bboxes[i_img, i_object] = [x, y, width, height]
        shapes[i_img, i_object] = [0]
        i_object += 1
    if np.random.choice([True, False]):
        size = np.random.randint(min_rect_size, max_rect_size)
        x = np.random.randint(0, img_size/2 - size)
        y = np.random.randint(img_size/2, img_size - size)
        mask = np.triu_indices(size)
        imgs[i_img, x + mask[0], y + mask[1]] = 1.
        #bboxes[i_img, i_object] = [x, y, size, size]
        #shapes[i_img, i_object] = [1]
        #i_object += 1
    for i in range(i_object, max_num_objects):
        bboxes[i_img, i] = [-1, -1, -1, -1]
        shapes[i_img, i] = [-1]
            
imgs.shape, bboxes.shape
# plotting sample input data
# see 5 randomly chosen input images. the bounding boxes of interested objects are marked red.
plt.figure(figsize=(24, 8))
for i_subplot in range(1, 6):
    plt.subplot(1, 5, i_subplot)
    i = np.random.randint(num_imgs)
    plt.imshow(imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
    for bbox, shape in zip(bboxes[i], shapes[i]):
        plt.gca().add_patch(matplotlib.patches.Rectangle((bbox[0], bbox[1]), bbox[2], bbox[3], ec='r', fc='none'))
# pre-processing data
X = (imgs.reshape(num_imgs, img_size, img_size, 1) - np.mean(imgs)) / np.std(imgs)
y = np.concatenate([bboxes / img_size, shapes], axis=-1).reshape(num_imgs, -1)
X.shape, y.shape
# final training & testing data
i = int(0.8 * num_imgs)
train_X = X[:i]
test_X = X[i:]
train_y = y[:i]
test_y = y[i:]
test_imgs = imgs[i:]
test_bboxes = bboxes[i:]
# creating deep network model
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Dropout, Convolution2D, MaxPooling2D, Flatten
from tensorflow.keras.optimizers import SGD
model = Sequential([
        Convolution2D(8, (3, 3), activation='relu', input_shape=(24, 24, 1)),
        Convolution2D(8, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Convolution2D(8, (3, 3), activation='relu'),
        MaxPooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(3000),
        Activation('relu'),
        Dropout(0.3),
        Dense(1500), 
        Activation('relu'), 
        Dense(500), 
        Activation('relu'),
        Dropout(0.3),
        Dense(50),
        Activation('relu'),
        Dense(y.shape[-1])
    ])
model.compile('adadelta', 'mse')
# training the model & validating
model.fit(train_X, train_y, epochs=100, validation_data=(test_X, test_y), verbose=2)
# predicting on test data
pred_y = model.predict(test_X)
pred_y = pred_y.reshape(len(pred_y), max_num_objects, -1)
pred_bboxes = pred_y[..., :4] * img_size
pred_shapes = pred_y[..., 4:5]
pred_bboxes.shape, pred_shapes.shape
# plotting the predictions
# see 5 randomly chosen output predictions (in blue/ green shapes). 
# note that no upper-triangular shape has got predicted.
# accuracy could be improved by other Deep Models or/and by tuning the various associated parameters/ variables/ methods.
plt.figure(figsize=(24, 8))
for i_subplot in range(1, 6):
    plt.subplot(1, 5, i_subplot)
    i = np.random.randint(len(test_X))
    plt.imshow(test_imgs[i].T, cmap='Greys', interpolation='none', origin='lower', extent=[0, img_size, 0, img_size])
    for pred_bbox, pred_shape in zip(pred_bboxes[i], pred_shapes[i]):
        if pred_shape[0] <= 0.5:
            plt.gca().add_patch(matplotlib.patches.Rectangle((pred_bbox[0], pred_bbox[1]), pred_bbox[2], pred_bbox[3], fc='b', alpha=0.5))
        else:
            xy = ([[pred_bbox[0]+pred_bbox[2], pred_bbox[1]+pred_bbox[3]],
                    [pred_bbox[0]+pred_bbox[2], pred_bbox[1]],
                    [pred_bbox[0], pred_bbox[1]]])
            plt.gca().add_patch(matplotlib.patches.Polygon(xy, True, fc='g', alpha=0.5))

References:- https://towardsdatascience.com/object-detection-with-neural-networks-a4e2c46b4491

Please CLAP for the post if you like it, & also share it. Stay connected, I will add more codes soon… Thanks!

Learning CNN (with Image Data) using Simple PYTHON Programs

1. Handwriting Recognition

2. Object Recognition

3. Single Object Detection (with Bounding Box)

4. Multiple Objects Detection (with Shapes)

Written by Sujoy Kumar Goswami