So I don’t have access to a webcam, is it possible to pass a video through pose estimation instead? Is there any examples lying around?
Please try this code.
import time
import cv2
import mxnet as mx
import gluoncv
from gluoncv.model_zoo import get_model
from gluoncv.data.transforms.pose import detector_to_alpha_pose, heatmap_to_coord
from gluoncv.utils.viz import cv_plot_image, cv_plot_keypoints
from gluoncv import utils
url = 'https://github.com/bryanyzhu/tiny-ucf101/raw/master/v_Basketball_g01_c01.avi'
video_fname = utils.download(url)
ctx = mx.cpu()
detector = get_model('ssd_512_mobilenet1.0_coco', pretrained=True, ctx=ctx)
detector.reset_class(classes=['person'], reuse_weights={'person':'person'})
detector.hybridize()
estimator = get_model('alpha_pose_resnet101_v1b_coco', pretrained=True, ctx=ctx)
estimator.hybridize()
cap = cv2.VideoCapture(video_fname)
fps = cap.get(cv2.CAP_PROP_FPS)
start = time.time()
while(True):
end = time.time()
sec = end-start
cap.set(cv2.CAP_PROP_POS_FRAMES, round(fps * sec))
ret, frame = cap.read()
if(ret):
frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
x, frame = gluoncv.data.transforms.presets.ssd.transform_test(frame, short=240)
x = x.as_in_context(ctx)
class_IDs, scores, bounding_boxs = detector(x)
pose_input, upscale_bbox = detector_to_alpha_pose(frame, class_IDs, scores, bounding_boxs)
if upscale_bbox is not None:
predicted_heatmap = estimator(pose_input.as_in_context(ctx))
pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores,
box_thresh=0.5, keypoint_thresh=0.2)
cv_plot_image(img)
else:
cv_plot_image(frame)
else:
break
if cv2.waitKey(1) == 27:
break
cap.release()
cv2.destroyAllWindows()
That worked beautifully. Another question: say if I wanted to save the video, how would I do that?
If you want to save the result to a file from your video file(not webcam), you can use all frames in your video file.
This is a simple example.
import cv2
import mxnet as mx
import gluoncv
from gluoncv.model_zoo import get_model
from gluoncv.data.transforms.pose import detector_to_alpha_pose, heatmap_to_coord
from gluoncv.utils.viz import cv_plot_keypoints
from gluoncv import utils
url = 'https://github.com/bryanyzhu/tiny-ucf101/raw/master/v_Basketball_g01_c01.avi'
video_fname = utils.download(url)
cap = cv2.VideoCapture(video_fname)
fps = int(cap.get(cv2.CAP_PROP_FPS))
width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))
height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
# Define the codec and create VideoWriter object
fourcc = cv2.VideoWriter_fourcc('M','J','P','G')
out = cv2.VideoWriter('output.avi',fourcc, fps, (width,height))
ctx = mx.cpu()
detector = get_model('ssd_512_mobilenet1.0_coco', pretrained=True, ctx=ctx)
detector.reset_class(classes=['person'], reuse_weights={'person':'person'})
detector.hybridize()
estimator = get_model('alpha_pose_resnet101_v1b_coco', pretrained=True, ctx=ctx)
estimator.hybridize()
while(True):
ret, frame = cap.read()
if(ret):
frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
x, frame = gluoncv.data.transforms.presets.ssd.transform_test(frame, short=240)
x = x.as_in_context(ctx)
class_IDs, scores, bounding_boxs = detector(x)
pose_input, upscale_bbox = detector_to_alpha_pose(frame, class_IDs, scores, bounding_boxs)
if upscale_bbox is not None:
predicted_heatmap = estimator(pose_input.as_in_context(ctx))
pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores,
box_thresh=0.5, keypoint_thresh=0.2)
out.write(img)
else:
out.write(frame)
else:
break
if cv2.waitKey(1) == 27:
break
cap.release()
out.release()
cv2.destroyAllWindows()
Hope this helps.
With this I managed to save the video, but the problem I get is that the color is wrong, and the video length doesn’t match. The output video tends to be shorter and much faster. I fixed the color issue by converting the frame back to BGR before writing it:
cap = cv2.VideoCapture(folder_raw + video_fname)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
size = (frame_width, frame_height)
fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')
result = cv2.VideoWriter(folder_result + 'demo_' + video_fname,
fourcc, fps, size)
start = time.time()
while(True):
end = time.time()
sec = end-start
cap.set(cv2.CAP_PROP_POS_FRAMES, round(fps * sec))
ret, frame = cap.read()
if(ret):
frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
x, frame = gluoncv.data.transforms.presets.ssd.transform_test(frame, short=240)
x = x.as_in_context(context)
class_IDs, scores, bounding_boxs = detector(x)
pose_input, upscale_bbox = detector_to_alpha_pose(frame,
class_IDs,
scores,
bounding_boxs)
if upscale_bbox is not None:
predicted_heatmap = estimator(pose_input.as_in_context(context))
pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
img = cv_plot_keypoints(frame, pred_coords,
confidence, class_IDs,
bounding_boxs, scores,
box_thresh=0.5, keypoint_thresh=0.1)
img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
img = cv2.resize(img, size)
result.write(img)
else:
frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
result.write(frame)
else:
break
if cv2.waitKey(1) == 27: # ESC to stop
break
cap.release()
result.release()
cv2.destroyAllWindows()
print("The video was successfully saved.")
In your code, you don’t use all fraems but set fps to the one of your original video.
fps = int(cap.get(cv2.CAP_PROP_FPS))
result = cv2.VideoWriter(folder_result + 'demo_' + video_fname,
fourcc, fps, size)
So the output video becomes shorter, I think.
You should use all frames in your original video.
This line is not needed.
cap.set(cv2.CAP_PROP_POS_FRAMES, round(fps * sec))
That helped, the output video matches better now, but still not 100% correct. I sent you a message directly here, can you take a look and come back to me? Much appriciated.