Auto Labeling is not a dream !
AI generation is coming, and we need to collect more data to train our models or improve them. However, all of this data needs ground truth. For tasks like segmentation or object detection, we need to manually crop a bounding box or draw a line to fit our target object, which is very difficult and time-consuming.
[⭐Update ⭐] Other project !
- using auto labeling skill in annotation app !
Now, we can use YOLO V8 to automate all of these steps!
Let’s start !
First you need to
pip install ultralytics
If you already have YOLO V8 but it’s an older version, you can use the following command to upgrade it using pip:
pip install ultralytics --upgrade
Import libraries
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
import os
Let’s see what classes YOLO has for pretraining.
names = model.model.names
print(names)
{0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane',
5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light',
10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench',
14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow',
20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack',
25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee',
30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite',
34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard',
38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork',
43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple',
48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog',
53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch',
58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv',
63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone',
68: 'microwave', 69: 'oven', 70: 'toaster', 71: 'sink', 72: 'refrigerator',
73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear',
78: 'hair drier', 79: 'toothbrush'}
There are five models in YOLO V8. with the smallest one on top and the largest one on the bottom.
For this exercise, I will using the smallest model YOLOv8n.
model = YOLO("yolov8n-seg.pt")
model.to('cuda:0') # to gpu 0
Load video
cap = cv2.VideoCapture("cars.mp4")
In this exercise, I used this video. You can also download it from the link below.
Resize the video
output_width = 1080
output_height = 600
Choose the object you want to segment.
objects_of_interest = ['car'] #<- which object do you want to segment
object_counts = {obj: 0 for obj in objects_of_interest}
Predict
while True:
ret, frame = cap.read()
if not ret:
print("Video frame is empty or video processing has been successfully completed.")
break
frame= cv2.resize(frame, (output_width, output_height))
# conf => confidence score
results = model.predict(frame, device='0', conf=0.25)
Data annotator
# result : is the predict result using yolo v8.
if results[0].masks is not None:
clss = results[0].boxes.cls.cpu().tolist()
masks = results[0].masks.xy
annotator = Annotator(frame, line_width=2)
for idx, (mask, cls) in enumerate(zip(masks, clss)):
det_label = names[int(cls)]
# if det_label is the object we want to detect
if det_label in objects_of_interest:
# if you don't have seg_bbox function,
# then you need to upgrade your Yolo v8.
annotator.seg_bbox(mask=mask,
mask_color=colors(int(cls), True),
det_label=det_label)
# Count objects
object_counts[det_label] += 1
What values in result ?
boxes: ultralytics.engine.results.Boxes object
keypoints: None
masks: ultralytics.engine.results.Masks object
names: {0: 'person', 1: 'bicycle', 2: 'car', 3: 'motorcycle', 4: 'airplane', 5: 'bus', 6: 'train', 7: 'truck', 8: 'boat', 9: 'traffic light', 10: 'fire hydrant', 11: 'stop sign', 12: 'parking meter', 13: 'bench', 14: 'bird', 15: 'cat', 16: 'dog', 17: 'horse', 18: 'sheep', 19: 'cow', 20: 'elephant', 21: 'bear', 22: 'zebra', 23: 'giraffe', 24: 'backpack', 25: 'umbrella', 26: 'handbag', 27: 'tie', 28: 'suitcase', 29: 'frisbee', 30: 'skis', 31: 'snowboard', 32: 'sports ball', 33: 'kite', 34: 'baseball bat', 35: 'baseball glove', 36: 'skateboard', 37: 'surfboard', 38: 'tennis racket', 39: 'bottle', 40: 'wine glass', 41: 'cup', 42: 'fork', 43: 'knife', 44: 'spoon', 45: 'bowl', 46: 'banana', 47: 'apple', 48: 'sandwich', 49: 'orange', 50: 'broccoli', 51: 'carrot', 52: 'hot dog', 53: 'pizza', 54: 'donut', 55: 'cake', 56: 'chair', 57: 'couch', 58: 'potted plant', 59: 'bed', 60: 'dining table', 61: 'toilet', 62: 'tv', 63: 'laptop', 64: 'mouse', 65: 'remote', 66: 'keyboard', 67: 'cell phone', 68: 'microwave', 69: 'oven', 70: 'toaster', 71:
'sink', 72: 'refrigerator', 73: 'book', 74: 'clock', 75: 'vase', 76: 'scissors', 77: 'teddy bear', 78: 'hair drier', 79: 'toothbrush'}
obb: None
orig_img: array([[[52, 48, 38],
[55, 51, 41],
[60, 54, 44],
...,
[29, 26, 25],
[29, 26, 25],
[30, 27, 26]],
[[58, 54, 44],
[56, 52, 42],
[59, 53, 43],
...,
[32, 29, 28],
[32, 29, 28],
[32, 29, 28]],
[[54, 50, 40],
[54, 50, 40],
[60, 54, 44],
...,
[30, 27, 26],
[30, 27, 26],
[30, 27, 26]],
...,
[[39, 30, 34],
[40, 31, 35],
[40, 32, 34],
...,
[52, 42, 42],
[52, 43, 40],
[52, 43, 40]],
[[36, 30, 33],
[37, 31, 34],
[41, 36, 37],
...,
[49, 41, 43],
[51, 41, 41],
[51, 41, 41]],
[[37, 32, 31],
[35, 30, 29],
[34, 29, 28],
...,
[50, 42, 44],
[52, 42, 42],
[52, 42, 42]]], dtype=uint8)
orig_shape: (600, 1080)
path: 'image0.jpg'
probs: None
save_dir: None
speed: {'preprocess': 1.0001659393310547, 'inference': 4.999876022338867, 'postprocess': 0.9996891021728516}]
Object count + visualize
text_y = 30
for obj, count in object_counts.items():
cv2.putText(frame, f"{obj.capitalize()} Count: {count}", (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
text_y += 40
# out.write(frame)
cv2.imshow("instance-segmentation", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# out.release()
cap.release()
cv2.destroyAllWindows()
Full code
import cv2
from ultralytics import YOLO
from ultralytics.utils.plotting import Annotator, colors
import os
model = YOLO("yolov8n-seg.pt")
model.to('cuda')
names = model.model.names
cap = cv2.VideoCapture("cars.mp4")
output_folder = "output_instances"
os.makedirs(output_folder, exist_ok=True)
# out = cv2.VideoWriter('instance-segmentation.avi',
# cv2.VideoWriter_fourcc(*'MJPG'),
# 30, (int(cap.get(3)), int(cap.get(4))))
output_width = 1080
output_height = 600
objects_of_interest = ['car']
object_counts = {obj: 0 for obj in objects_of_interest}
while True:
ret, frame = cap.read()
if not ret:
print("Video frame is empty or video processing has been successfully completed.")
break
frame = cv2.resize(frame, (output_width, output_height))
results = model.predict(frame, device='0', conf=0.25)
if results[0].masks is not None:
clss = results[0].boxes.cls.cpu().tolist()
masks = results[0].masks.xy
annotator = Annotator(frame, line_width=2)
for idx, (mask, cls) in enumerate(zip(masks, clss)):
det_label = names[int(cls)]
if det_label in objects_of_interest:
annotator.seg_bbox(mask=mask,
mask_color=colors(int(cls), True),
det_label=det_label)
object_counts[det_label] += 1
# Save each instance segmented object
# instance_folder = os.path.join(output_folder, det_label)
# os.makedirs(instance_folder, exist_ok=True)
# instance_path = os.path.join(instance_folder, f"{det_label}_{idx}.png")
# cv2.imwrite(instance_path, frame)
text_y = 30
for obj, count in object_counts.items():
cv2.putText(frame, f"{obj.capitalize()} Count: {count}", (10, text_y), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255), 2)
text_y += 40
# out.write(frame)
cv2.imshow("instance-segmentation", frame)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# out.release()
cap.release()
cv2.destroyAllWindows()
⭐If you want to see the segmentation results, uncomment
the above comments.
If you want to see a fast demonstration of segmentation, keep the comments
as they are and run the code.⭐