Skip to content

Commit

Permalink
added video object detector with opencv
Browse files Browse the repository at this point in the history
  • Loading branch information
oluwaseun authored and Chilicyy committed Nov 4, 2022
1 parent 7501915 commit 0828aac
Showing 1 changed file with 131 additions and 0 deletions.
131 changes: 131 additions & 0 deletions deploy/ONNX/OpenCV/yolo_video.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
# usage: python yolo_video.py --model "./yolov6n.onnx" --source 0

import cv2
import numpy as np
import argparse

INPUT_WIDTH = 640
INPUT_HEIGHT = 640
SCORE_THRESHOLD = 0.5
NMS_THRESHOLD = 0.45
CONFIDENCE_THRESHOLD = 0.2

# Text parameters.
FONT_FACE = cv2.FONT_HERSHEY_SIMPLEX
FONT_SCALE = 0.7
THICKNESS = 1

# Colors.
BLACK = (0,0,0)
BLUE = (255,178,50)
YELLOW = (0,255,255)

def draw_label(im, label, x, y):
"""Draw text onto image at location."""

# Get text size.
text_size = cv2.getTextSize(label, FONT_FACE, FONT_SCALE, THICKNESS)
dim, baseline = text_size[0], text_size[1]
# Use text size to create a BLACK rectangle.
cv2.rectangle(im, (x,y), (x + dim[0], y + dim[1] + baseline), (0,0,0), cv2.FILLED);
# Display text inside the rectangle.
cv2.putText(im, label, (x, y + dim[1]), FONT_FACE, FONT_SCALE, YELLOW, THICKNESS, cv2.LINE_AA)

def pre_process(input_image, net):
# Create a 4D blob from a frame.
blob = cv2.dnn.blobFromImage(input_image, 1/255, (INPUT_WIDTH, INPUT_HEIGHT), [0,0,0], 1, crop=False)

# Sets the input to the network.
net.setInput(blob)

# Run the forward pass to get output of the output layers.
outputs = net.forward(net.getUnconnectedOutLayersNames())
return outputs

def post_process(input_image, outputs):
# Lists to hold respective values while unwrapping.
class_ids = []
confidences = []
boxes = []
# Rows.
rows = outputs[0].shape[1]
image_height, image_width = input_image.shape[:2]
# Resizing factor.
x_factor = image_width / INPUT_WIDTH
y_factor = image_height / INPUT_HEIGHT
# Iterate through detections.
for r in range(rows):
row = outputs[0][0][r]
confidence = row[4]
# Discard bad detections and continue.
if confidence >= CONFIDENCE_THRESHOLD:
classes_scores = row[5:]
# Get the index of max class score.
class_id = np.argmax(classes_scores)
# Continue if the class score is above threshold.
if (classes_scores[class_id] > SCORE_THRESHOLD):
confidences.append(confidence)
class_ids.append(class_id)
cx, cy, w, h = row[0], row[1], row[2], row[3]
left = int((cx - w/2) * x_factor)
top = int((cy - h/2) * y_factor)
width = int(w * x_factor)
height = int(h * y_factor)
box = np.array([left, top, width, height])
boxes.append(box)

# Perform non maximum suppression to eliminate redundant, overlapping boxes with lower confidences.
indices = cv2.dnn.NMSBoxes(boxes, confidences, CONFIDENCE_THRESHOLD, NMS_THRESHOLD)
for i in indices:
box = boxes[i]
left = box[0]
top = box[1]
width = box[2]
height = box[3]
# Draw bounding box.
cv2.rectangle(input_image, (left, top), (left + width, top + height), BLUE, 3*THICKNESS)
# Class label.
label = "{}:{:.2f}".format(classes[class_ids[i]], confidences[i])
# Draw label.
draw_label(input_image, label, left, top)
return input_image


def video():
while True :

# get frame from the video
ret, frame = cap.read()
net = cv2.dnn.readNet(modelWeights)
# Process image.
detections = pre_process(frame, net)
img = post_process(frame.copy(), detections)
"""
Put efficiency information. The function getPerfProfile returns the overall time for inference(t)
and the timings for each of the layers(in layersTimes).
"""
t, _ = net.getPerfProfile()
label = 'Inference time: %.2f ms' % (t * 1000.0 / cv2.getTickFrequency())
# print(label)
cv2.putText(img, label, (20, 40), FONT_FACE, FONT_SCALE, (0, 0, 255), THICKNESS, cv2.LINE_AA)
cv2.imshow('Output', img)

if cv2.waitKey(30) & 0xFF == ord('q'):
break


if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--model', default='models/yolov6n.onnx', help="Input your onnx model.")
parser.add_argument('--source', default=0, type=int, help="video source - 0,1,2 ...")
parser.add_argument('--classesFile', default='coco.names', help="Path to your classesFile.")
args = parser.parse_args()

modelWeights, video_source, classesFile = args.model, args.source, args.classesFile
cap = cv2.VideoCapture(video_source)
classes = None
with open(classesFile, 'rt') as f:
classes = f.read().rstrip('\n').split('\n')

video()

0 comments on commit 0828aac

Please sign in to comment.