Quantized pose estimation/object detection models

I’m using a script to run pose estimation on the video stream from a camera, and I wanted to try improving the inference speed. Currently, I’m using:

  • an SSD model, pre-trained on COCO (ssd_512_mobilenet1.0_coco), to detect persons in the frame;
  • simple_pose_resnet18_v1b for pose estiamtion, suing the detections from the previous model;

And this the code I’m using.

import mxnet as mx
from gluoncv.data import mscoco
from gluoncv.model_zoo import get_model
from gluoncv.data.transforms.pose import detector_to_simple_pose, heatmap_to_coord
from gluoncv.utils.viz import cv_plot_image, cv_plot_keypoints
from mxnet.contrib.quantization import *

def main():

	ctx = mx.cpu()
	net_obj_det = gcv.model_zoo.get_model('ssd_512_mobilenet1.0_coco', pretrained=True, ctx=ctx)
	net_pose_est = get_model('simple_pose_resnet18_v1b', pretrained='ccd24037', ctx=ctx)
	net_obj_det.hybridize()
	net_pose_est.hybridize()

	cap = cv2.VideoCapture(0)


	while(True):

		# Load frame from the camera
		ret, frame_np_orig = cap.read()

		key = cv2.waitKey(1)
		if (key == ord('q')) or (ret == False):
			cv2.destroyAllWindows()
			cap.release()
			break

		## Image pre-processing
		frame_nd_orig = mx.nd.array(cv2.cvtColor(frame_np_orig, cv2.COLOR_BGR2RGB)).astype('uint8')
		frame_nd_new, frame_np_new = gcv.data.transforms.presets.ssd.transform_test(frame_nd_orig, short=512, max_size=700)
		frame_nd_new = frame_nd_new.as_in_context(ctx)

		## run frame through network, detect keypoints for persons
		frame_nd_new = frame_nd_new.as_in_context(ctx)
		class_IDs, scores, bounding_boxes = net_obj_det(frame_nd_new)

		## select only one class (person)
		selected_indices_person = np.where( ((class_IDs[0].asnumpy() == 0) & (scores[0].asnumpy() >= 0.35)))[0]
		selected_bboxes_person = bounding_boxes[0].asnumpy()[selected_indices_person]
		selected_class_IDs = class_IDs[0].asnumpy()[selected_indices_person]		# unused so far
		selected_scores = scores[0].asnumpy()[selected_indices_person]				# unused so far

		## detection of body keypoints
		pose_input, upscale_bbox = detector_to_simple_pose(frame_np_new, class_IDs, scores, bounding_boxes, output_shape=(256, 192), ctx=ctx)
		if len(upscale_bbox) > 0:
			predicted_heatmap = net_pose_est(pose_input)
			pred_coords, confidence = heatmap_to_coord(predicted_heatmap, upscale_bbox)
			img = cv_plot_keypoints(frame_np_new, pred_coords, confidence, class_IDs, bounding_boxes, scores, box_thresh=0.5, keypoint_thresh=0.2)


		cv_plot_image(img)

	cap.release()


if __name__ == "__main__":
	main()

I wanted to try a quantization of these models and I was referring to this page.. So, I tried something like this:

net_obj_det = 	net_obj_det = quantize_net(net_obj_det, quantized_dtype='auto',
							   exclude_layers=None, exclude_layers_match=None,
							   calib_data=None, data_shapes=None,
							   calib_mode='naive', num_calib_examples=None,
							   ctx=mx.cpu(), logger=logging)

But it’s returning the error:

 File "/home/lews/anaconda3/envs/gluon/lib/python3.8/site-packages/mxnet/contrib/quantization.py", line 820, in quantize_net
    data_shapes = dshapes
UnboundLocalError: local variable 'dshapes' referenced before assignment

In the page I linked, it’s specified that ‘data_shapes’ should be a “List of DataDesc, required if calib_data is not provided”. How am I supposed to use it precisely?