pip install opencv-python
Usage example : python object_detection.py --input object_detection.jpg --model v2 --threshold 0.9
Code :
# Object detection with PyTorch
# Source : https://debuggercafe.com/object-detection-using-pytorch-faster-rcnn-resnet50-fpn-v2/
# Usage example : python object_detection.py --input object_detection.jpg --model v2 --threshold 0.9
import torchvision.transforms as transforms
import cv2
import numpy as np
import torch
# from coco_names import COCO_INSTANCE_CATEGORY_NAMES as coco_names
coco_names = [
'__background__', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant', 'N/A', 'stop sign',
'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
'elephant', 'bear', 'zebra', 'giraffe', 'N/A', 'backpack', 'umbrella', 'N/A', 'N/A',
'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket',
'bottle', 'N/A', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl',
'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', 'N/A', 'dining table',
'N/A', 'N/A', 'toilet', 'N/A', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'N/A', 'book',
'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'
]
np.random.seed(42)
# Create different colors for each class.
COLORS = np.random.uniform(0, 255, size=(len(coco_names), 3))
# Define the torchvision image transforms.
transform = transforms.Compose([
transforms.ToTensor(),
])
def predict(image, model, device, detection_threshold):
"""
Predict the output of an image after forward pass through
the model and return the bounding boxes, class names, and
class labels.
"""
# Transform the image to tensor.
image = transform(image).to(device)
# Add a batch dimension.
image = image.unsqueeze(0)
# Get the predictions on the image.
with torch.no_grad():
outputs = model(image)
# Get score for all the predicted objects.
pred_scores = outputs[0]['scores'].detach().cpu().numpy()
# Get all the predicted bounding boxes.
pred_bboxes = outputs[0]['boxes'].detach().cpu().numpy()
# Get boxes above the threshold score.
boxes = pred_bboxes[pred_scores >= detection_threshold].astype(np.int32)
high_scores = pred_scores[pred_scores >= detection_threshold]
labels = outputs[0]['labels'][:len(boxes)]
# Get all the predicited class names.
pred_classes = [coco_names[i] for i in labels.cpu().numpy()]
print(pred_classes)
print(pred_scores[pred_scores >= detection_threshold])
return boxes, pred_classes, labels, high_scores
def draw_boxes(boxes, classes, labels, scores, image):
"""
Draws the bounding box around a detected object.
"""
lw = max(round(sum(image.shape) / 2 * 0.003), 2) # Line width.
tf = max(lw - 1, 1) # Font thickness.
for i, box in enumerate(boxes):
color = COLORS[labels[i]]
cv2.rectangle(
img=image,
pt1=(int(box[0]), int(box[1])),
pt2=(int(box[2]), int(box[3])),
color=color[::-1],
thickness=lw
)
cv2.putText(
img=image,
text=classes[i]+":"+str(round(1000*scores[i])/1000.0),
org=(int(box[0]), int(box[1]-5)),
fontFace=cv2.FONT_HERSHEY_SIMPLEX,
fontScale=lw / 3,
color=color[::-1],
thickness=tf,
lineType=cv2.LINE_AA
)
return image
import torchvision
def get_model(device='cpu', model_name='v2'):
# Load the model.
if model_name == 'v2':
model = torchvision.models.detection.fasterrcnn_resnet50_fpn_v2(
weights='DEFAULT'
)
elif model_name == 'v1':
model = torchvision.models.detection.fasterrcnn_resnet50_fpn(
weights='DEFAULT'
)
# Load the model onto the computation device.
model = model.eval().to(device)
return model
# import torch
import argparse
import cv2
# import detect_utils
# import numpy as np
from PIL import Image
# from model import get_model
# Construct the argument parser.
parser = argparse.ArgumentParser()
parser.add_argument(
'-i', '--input', default='input/image_1.jpg',
help='path to input input image'
)
parser.add_argument(
'-t', '--threshold', default=0.5, type=float,
help='detection threshold'
)
parser.add_argument(
'-m', '--model', default='v2',
help='faster rcnn resnet50 fpn or fpn v2',
choices=['v1', 'v2']
)
args = vars(parser.parse_args())
# Define the computation device.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = get_model(device, args['model'])
# Read the image.
image = Image.open(args['input']).convert('RGB')
# Create a BGR copy of the image for annotation.
image_bgr = cv2.cvtColor(np.array(image), cv2.COLOR_RGB2BGR)
# Detect outputs.
with torch.no_grad():
boxes, classes, labels, scores = predict(image, model, device, args['threshold'])
# Draw bounding boxes.
image = draw_boxes(boxes, classes, labels, scores, image_bgr)
save_name = f"{args['input'].split('/')[-1].split('.')[0]}_t{''.join(str(args['threshold']).split('.'))}_{args['model']}"
cv2.imshow('Image', image)
# cv2.imwrite(f"outputs/{save_name}.jpg", image)
cv2.imwrite(f"{save_name}.jpg", image)
cv2.waitKey(0)
|
Input image :
Output :