Perfecting Imperfect Table Gridlines: A Step-by-Step Guide using Python OpenCV

Have you ever come across a table that lacked complete or missing lines, or worse, had no gridlines at all? If so, this article is specifically tailored for you.

6 min readFeb 24, 2023

Here I’ll show you how you can draw horizontal and vertical gridlines for your table image and if necessary I’ll show you how to remove them all (for example if you have broken lines, or if some are missing) to draw them better using the algorithm I’ll show you!

Introduction
Goal
Remove Gridlines
Draw Vertical and Horizontal Gridlines
Crop Image According to Table Bounding Boxes
Apply all of the above
Show Image by Stages
End Notes

Introduction

Many times we have images with all sorts of tables, some with borders and grid lines, some with missing or broken parts, and some without gridlines and borders at all.

Here I will try to come up with a generic solution that hopefully will work on all of the above cases.

All throughout this article when ever I say gridlines I refer to all of the table gridlines and it’s borders.

Goal

My goal here is to remove all of the table gridlines and borders from the image and then redraw them in the proper locations and proper lengths artificially.

Remove Gridlines

def removeLines(old_image: np.ndarray, axis) -> np.ndarray:
    gray = cv2.cvtColor(old_image, cv2.COLOR_BGR2GRAY)
    thresh = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)[1]
    if axis == "horizontal":
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 25))
    elif axis == "vertical":
        kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (25, 1))
    else:
        raise ValueError("Axis must be either 'horizontal' or 'vertical' in order to work")
    detected_lines = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations = 2)
    contours = cv2.findContours(detected_lines, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    result = old_image.copy()
    for contour in contours:
        cv2.drawContours(result, [contour], -1, (255, 255, 255), 2)
    return result

Function walkthrough:

Converting the input image to grayscale
Applying Otsu’s thresholding algorithm, read more about it here
Checking if we need a horizontal or a vertical kernel
Applying a morphological opening to the thresholded image
Finding the contours
Drawing complete white lines on top of the detected gridlines contours

Draw Vertical and Horizontal Gridlines

def drawGridlines(old_image: np.ndarray) -> dict:
  # Get dimensions
  hh_, ww_ = old_image.shape[:2]
  # Convert image to grayscale 
  gray = cv2.cvtColor(old_image, cv2.COLOR_BGR2GRAY)
  # Threshold on white - binary
  thresh = cv2.threshold(gray, 220, 255, cv2.THRESH_BINARY)[1]
  # Resize thresh image to a single row
  row = cv2.resize(thresh, (ww_, 1), interpolation = cv2.INTER_AREA)
  # Threshold on white
  thresh_row = cv2.threshold(row, 254, 255, cv2.THRESH_BINARY)[1]
  # Apply small amount of morphology to merge with column of text
  kernel = cv2.getStructuringElement(cv2.MORPH_RECT , (5, 1))
  thresh_row = cv2.morphologyEx(thresh_row, cv2.MORPH_OPEN, kernel)
  # Get vertical contours
  contours_v = cv2.findContours(thresh_row, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  contours_v = contours_v[0] if len(contours_v) == 2 else contours_v[1]
  full_grid_image = old_image.copy()
  vertical_img = old_image.copy()
  for contour in contours_v:
      x, y, w, h = cv2.boundingRect(contour)
      xcenter = x + w // 2
      cv2.line(vertical_img, (xcenter, 0), (xcenter, hh_ - 1), (0, 0, 0), 1)
      cv2.line(full_grid_image, (xcenter, 0), (xcenter, hh_ - 1), (0, 0, 0), 1)
  # Resize thresh image to a single column
  column = cv2.resize(thresh, (1, hh_), interpolation = cv2.INTER_AREA)
  # Threshold on white - binary
  thresh_column = cv2.threshold(column, 254, 255, cv2.THRESH_BINARY)[1]
  # Get horizontal contours
  contours_h = cv2.findContours(thresh_column, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  contours_h = contours_h[0] if len(contours_h) == 2 else contours_h[1]
  horizontal_img = old_image.copy()
  for contour in contours_h:
      x, y, w, h = cv2.boundingRect(contour)
      ycenter = y + h // 2
      cv2.line(horizontal_img, (0, ycenter), (ww_ - 1, ycenter), (0, 0, 0), 1)
      cv2.line(full_grid_image, (0, ycenter), (ww_ - 1, ycenter), (0, 0, 0), 1)
  # Return results as a dictionary
  return {
    'threshold': thresh,
    'vertical': vertical_img,
    'horizontal': horizontal_img,
    'full': full_grid_image
  }

The general function walkthrough is (function has comments that explain everything):

Thresholding, processing, and getting horizontal contours
Drawing horizontal contours
Thresholding, processing, and getting vertical contours
Drawing vertical contours

Crop Image According to Table Bounding Boxes

def cropImage(old_image: np.ndarray) -> np.ndarray:
    # Get dimensions
    hh, ww = old_image.shape[:2]
    # Convert to gray
    gray = cv2.cvtColor(old_image, cv2.COLOR_BGR2GRAY)
    # Threshold
    thresh = cv2.threshold(gray, 128, 255, cv2.THRESH_BINARY)[1]
    # Crop 1 pixel and add 1 pixel white border to ensure outer white regions not considered the small contours
    thresh = thresh[1: hh - 1, 1 : ww - 1]
    thresh = cv2.copyMakeBorder(thresh, 1, 1, 1, 1, borderType = cv2.BORDER_CONSTANT, value = (255, 255, 255))
    # Get contours
    contours = cv2.findContours(thresh, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    contours = contours[0] if len(contours) == 2 else contours[1]
    # Get min and max x and y from all bounding boxes larger than half of the image size
    thresh_area = hh * ww / 2
    xmin = ww
    ymin = hh
    xmax = 0
    ymax = 0
    for contour in contours:
        area = cv2.contourArea(contour)
        if area < thresh_area:
            x, y, w, h = cv2.boundingRect(contour)
            xmin = x if (x < xmin) else xmin
            ymin = y if (y < ymin) else ymin
            xmax = x + w - 1 if (x + w - 1 > xmax ) else xmax
            ymax = y + h - 1 if (y + h - 1 > ymax) else ymax
    # Draw bounding box     
    bounding_box = old_image.copy()
    cv2.rectangle(bounding_box, (xmin, ymin), (xmax, ymax), (0, 0, 255), 2)
    # Crop old_image at the bounding box, but add 2 all around to keep the black lines
    result = old_image[ymin : ymax, xmin : xmax]
    return result

Function walkthrough (read function comments to know more):

Converting the input image to grayscale
Applying a binary threshold
Ensure the outer white region
Getting the contours
Iterate over all the contours and compute the minimum and maximum values of the x and y coordinates of their bounding boxes
Draw a rectangle to represent the bounding box around the region of interest
Crop image according to the bounding box

Apply all of the above

Here I will show you an example of a main file, using all of the above functions and showing the result

import cv2
import matplotlib.pyplot as plt
from GridlinesImprovement.draw_gridlines_functions import drawGridlines
from GridlinesImprovement.cropping import cropImage
from GridlinesImprovement.remove_gridlines import removeLines


image_path = "Model Implementation/DummyDatabase/test_images/bad_grid.png"
new_image_path = "Model Implementation/DummyDatabase/test_images/image_grided.png"
# Read image
original = cv2.imread(image_path)
# Remove all gridlines
gridless = removeLines(removeLines(original, 'horizontal'), 'vertical')
# Draw grid lines
images_by_stage = drawGridlines(gridless.copy())
"""
    images_by_stage: (dict)
        'threshold': threshold image
        'vertical': vertical grid lines image
        'horizontal': horizontal grid lines image
        'full': full grid lines image
"""
# Obtain full grid image
full_image = images_by_stage['full'].copy()
# Crop image
cropped_image = cropImage(full_image.copy())
# Save new image
cv2.imwrite(new_image_path, cropped_image)

Show Image by Stages

f1, ax1  = plt.subplots(2, 2, figsize = (15,5))
ax1[0][0].set_title('Input Image')
ax1[0][0].imshow(original_image)
ax1[0][1].set_title('images["vertical"]')
ax1[0][1].imshow(images["vertical"])
ax1[1][0].set_title('images["horizontal"]')
ax1[1][0].imshow(images["horizontal"])
ax1[1][1].set_title('images["full"]')
ax1[1][1].imshow(images["full"])
f2, ax2  = plt.subplots(1, 1, figsize = (8,5))
ax2.set_title('Final Image')
ax2.imshow(result)
plt.show()

End Notes

This article is an improvement idea I had when I needed to extract the tabular data and I run into a problem, the problem was whenever I had a table with broken gridlines or with no gridlines at all the extraction algorithm didn't work the best so I had this idea to artificially draw the table gridlines.

The order of the 3 articles that are related to each other is as follows:

Full code can be found in my GitHub repo