The codes comes from https://gluon-cv.mxnet.io/build/examples_detection/demo_webcam.html. I have changed to detect on a video.
The point is that
I’m using yolo3_mobilenet1.0_coco this pre-trained model. it has to give max FPS as mentioned in the https://gluon-cv.mxnet.io/model_zoo/detection.html. But I tried to run it on a GPU I’m getting 2 to 3 FPS. I’m using GeForce RTX 2080 Ti.
Does the below mentioned code is correct or not? Do I need to do any modifications?
import time
import cv2
import gluoncv as gcv
import mxnet as mx
ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu(0)
net = gcv.model_zoo.get_model('yolo3_mobilenet1.0_coco', pretrained=True,ctx=ctx)
# Load the webcam handler
cap = cv2.VideoCapture("traffic.mp4")
time.sleep(1) ### letting the camera autofocus
fourcc = cv2.VideoWriter_fourcc(*'XVID')
out = cv2.VideoWriter('output1.avi',fourcc, 20.0, (512,512))
axes = None
#NUM_FRAMES = 200 # you can change this
while True:
# Load frame from the camera
start_time = time.time()
ret, frame = cap.read()
# Image pre-processing
frame = mx.nd.array(frame).astype('uint8')
rgb_nd, frame = gcv.data.transforms.presets.yolo.transform_test(frame, short=512, max_size=700)
# Run frame through network
rgb_nd = rgb_nd.as_in_context(ctx)
class_IDs, scores, bounding_boxes = net(rgb_nd)
# Display the result
img = gcv.utils.viz.cv_plot_bbox(frame, bounding_boxes[0], scores[0], class_IDs[0], class_names=net.classes)
#gcv.utils.viz.cv_plot_image(img)
x=cv2.resize(img,dsize=(512,512))
print("FPS: ", 1.0 / (time.time() - start_time))
text=str(1.0 / (time.time() - start_time))
font = cv2.FONT_HERSHEY_SIMPLEX
bottomLeftCornerOfText = (10,(500))
fontScale = 1
fontColor = (255,0,0)
lineType = 1
cv2.putText(x,text, bottomLeftCornerOfText, font, fontScale,fontColor,lineType)
# cv2.imshow('img',img)
out.write(x)
if cv2.waitKey(1) >= 0: # Break with ESC
break
cap.release()
cv2.destroyAllWindows()