How to run pose estimation/alphapose with video instead of cam?

haoivu · October 9, 2020, 1:48pm

So I don’t have access to a webcam, is it possible to pass a video through pose estimation instead? Is there any examples lying around?

dai-ichiro · October 10, 2020, 12:47am

Please try this code.

import time

import cv2

import mxnet as mx

import gluoncv

from gluoncv.model_zoo import get_model

from gluoncv.data.transforms.pose import detector_to_alpha_pose, heatmap_to_coord

from gluoncv.utils.viz import cv_plot_image, cv_plot_keypoints

from gluoncv import utils

url = 'https://github.com/bryanyzhu/tiny-ucf101/raw/master/v_Basketball_g01_c01.avi'

video_fname = utils.download(url)

ctx = mx.cpu()

detector = get_model('ssd_512_mobilenet1.0_coco', pretrained=True, ctx=ctx)

detector.reset_class(classes=['person'], reuse_weights={'person':'person'})

detector.hybridize()

estimator = get_model('alpha_pose_resnet101_v1b_coco', pretrained=True, ctx=ctx)

estimator.hybridize()

cap = cv2.VideoCapture(video_fname)

fps = cap.get(cv2.CAP_PROP_FPS)

start = time.time()

while(True):

    end = time.time()

    sec = end-start

    cap.set(cv2.CAP_PROP_POS_FRAMES, round(fps * sec))

    ret, frame = cap.read()

    

    if(ret):

        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')

        x, frame = gluoncv.data.transforms.presets.ssd.transform_test(frame, short=240)

        x = x.as_in_context(ctx)

        class_IDs, scores, bounding_boxs = detector(x)

        pose_input, upscale_bbox = detector_to_alpha_pose(frame, class_IDs, scores, bounding_boxs)

    

        if upscale_bbox is not None:

            predicted_heatmap = estimator(pose_input.as_in_context(ctx))

            pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

            img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores,

                                    box_thresh=0.5, keypoint_thresh=0.2)

            cv_plot_image(img)

        else:

            cv_plot_image(frame)

    else:

        break

    

    if cv2.waitKey(1) == 27:

        break

cap.release()

cv2.destroyAllWindows()

haoivu · October 24, 2020, 12:29am

That worked beautifully. Another question: say if I wanted to save the video, how would I do that?

dai-ichiro · October 24, 2020, 5:58am

If you want to save the result to a file from your video file(not webcam), you can use all frames in your video file.
This is a simple example.

import cv2

import mxnet as mx

import gluoncv

from gluoncv.model_zoo import get_model

from gluoncv.data.transforms.pose import detector_to_alpha_pose, heatmap_to_coord

from gluoncv.utils.viz import cv_plot_keypoints

from gluoncv import utils

url = 'https://github.com/bryanyzhu/tiny-ucf101/raw/master/v_Basketball_g01_c01.avi'

video_fname = utils.download(url)

cap = cv2.VideoCapture(video_fname)

fps = int(cap.get(cv2.CAP_PROP_FPS))

width = int(cap.get(cv2.CAP_PROP_FRAME_WIDTH))

height = int(cap.get(cv2.CAP_PROP_FRAME_HEIGHT))

# Define the codec and create VideoWriter object

fourcc = cv2.VideoWriter_fourcc('M','J','P','G')

out = cv2.VideoWriter('output.avi',fourcc, fps, (width,height))

ctx = mx.cpu()

detector = get_model('ssd_512_mobilenet1.0_coco', pretrained=True, ctx=ctx)

detector.reset_class(classes=['person'], reuse_weights={'person':'person'})

detector.hybridize()

estimator = get_model('alpha_pose_resnet101_v1b_coco', pretrained=True, ctx=ctx)

estimator.hybridize()

while(True):

    ret, frame = cap.read()

    if(ret):

        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')

        x, frame = gluoncv.data.transforms.presets.ssd.transform_test(frame, short=240)

        x = x.as_in_context(ctx)

        class_IDs, scores, bounding_boxs = detector(x)

        pose_input, upscale_bbox = detector_to_alpha_pose(frame, class_IDs, scores, bounding_boxs)

        if upscale_bbox is not None:

            predicted_heatmap = estimator(pose_input.as_in_context(ctx))

            pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)

            img = cv_plot_keypoints(frame, pred_coords, confidence, class_IDs, bounding_boxs, scores,

                                    box_thresh=0.5, keypoint_thresh=0.2)

            out.write(img)

        else:

            out.write(frame)

    else:

        break

    if cv2.waitKey(1) == 27:

        break

cap.release()

out.release()

cv2.destroyAllWindows()

Hope this helps.

haoivu · October 24, 2020, 5:14pm

With this I managed to save the video, but the problem I get is that the color is wrong, and the video length doesn’t match. The output video tends to be shorter and much faster. I fixed the color issue by converting the frame back to BGR before writing it:

cap = cv2.VideoCapture(folder_raw + video_fname)
frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

frame_width = int(cap.get(3))
frame_height = int(cap.get(4))

size = (frame_width, frame_height)

fourcc = cv2.VideoWriter_fourcc('M', 'J', 'P', 'G')

result = cv2.VideoWriter(folder_result + 'demo_' + video_fname,
                    fourcc, fps, size)

start = time.time()

while(True):
    end = time.time()
    sec = end-start
    cap.set(cv2.CAP_PROP_POS_FRAMES, round(fps * sec))
    ret, frame = cap.read()

    if(ret):
        frame = mx.nd.array(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)).astype('uint8')
        x, frame = gluoncv.data.transforms.presets.ssd.transform_test(frame, short=240)
        x = x.as_in_context(context)
    
        class_IDs, scores, bounding_boxs = detector(x)
        pose_input, upscale_bbox = detector_to_alpha_pose(frame,
                                                          class_IDs,
                                                          scores,
                                                          bounding_boxs)
    
        if upscale_bbox is not None:
            predicted_heatmap = estimator(pose_input.as_in_context(context))
            pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
            img = cv_plot_keypoints(frame, pred_coords,
                                                   confidence, class_IDs,
                                                   bounding_boxs, scores,
                                                   box_thresh=0.5, keypoint_thresh=0.1)
        
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
            img = cv2.resize(img, size)
            result.write(img)
        else:
            frame = cv2.cvtColor(frame, cv2.COLOR_RGB2BGR)
            result.write(frame)
    else:
        break
    
    if cv2.waitKey(1) == 27: # ESC to stop
        break

cap.release()
result.release()

cv2.destroyAllWindows()
print("The video was successfully saved.")

dai-ichiro · October 25, 2020, 6:07am

In your code, you don’t use all fraems but set fps to the one of your original video.

fps = int(cap.get(cv2.CAP_PROP_FPS))
result = cv2.VideoWriter(folder_result + 'demo_' + video_fname,
                    fourcc, fps, size)

So the output video becomes shorter, I think.
You should use all frames in your original video.

This line is not needed.
cap.set(cv2.CAP_PROP_POS_FRAMES, round(fps * sec))

haoivu · October 25, 2020, 5:09pm

That helped, the output video matches better now, but still not 100% correct. I sent you a message directly here, can you take a look and come back to me? Much appriciated.

Topic		Replies	Views
Quantized pose estimation/object detection models Gluon	0	443	November 3, 2021
Issue in gluoncv Gluon	6	1382	September 15, 2019
How to run yolo3-trained model on video Gluon	3	958	October 5, 2018
How to test a object detection model on GPU Gluon	2	981	September 2, 2019
Gluoncv VideoClsCustom Gluon	0	369	July 27, 2021

How to run pose estimation/alphapose with video instead of cam?

Related Topics