diff --git a/tasks/hand_pose/README.md b/tasks/hand_pose/README.md new file mode 100644 index 0000000..7f76c9c --- /dev/null +++ b/tasks/hand_pose/README.md @@ -0,0 +1,68 @@ +# Hand Pose Estimation And Classification + +This project is an extention of TRT Pose for Hand Pose Detection. The project includes + +- Pretrained models for hand pose estimation capable of running in real time on Jetson Xavier NX. + +- Scripts for applications of Hand Pose Estimation + + - Hand gesture recoginition (hand pose classification) + + - Cursor control + + - Mini-Paint type of application + +- Pretrained model for gesture recoginition + +## Getting Started + +### Step 1 - Install trt_pose and it's dependencies + +Make sure to follow all the instructions from trt_pose and install all it's depenedencies. +Follow the following instruction from https://github.com/NVIDIA-AI-IOT/trt_pose. + +### Step 2 - Install dependecies for hand pose + + pip install traitlets + + +### Step 3 - Run hand pose and it's applications + +A) Hand Pose demo + + - Open and follow live_hand_pose.ipynb notebook. + +B) Hand gesture recoginition (hand pose classification) + - Install dependecies + - scikit-learn + - pip install -U scikit-learn + - or install it from the source + The current gesture classification model supports six classes (fist, pan, stop, fine, peace, no hand). + More gestures can be added by a simple process of creating your own dataset and training it on an svm model. + An SVM model weight is provided for inference. + + To make your own hand gesture classification from the hand pose estimation, follow the following steps + + - Create your own dataset using the gesture_data_collection.ipynb or gesture_data_collection_with_pose.ipynb. + This will allow you to create the type of gestures you want to classify. (eg. tumbs up, fist,etc). + This notebook will automatically create a dataset with images and labels that is ready to be trained for gesture classification. + + - Train using the train_gesture_classification.ipynb notebook file. It uses an SVM from scikit-learn. + Other types of models can also be experimented. + + C) Cursor control application + + - Install dependecies + - pyautogui + - python3 -m pip install pyautogui + - On jetson install it from the source + + - Open and follow the cursor_control_live_demo.ipynb notebook. + - This will allow you to control your mouse cursor on your desktop. It uses the hand gesture classification. + When your hand geture is pan, you can control the cursor. when it is stop, it's left click. + +D) Mini-Paint + +The model was trained using the training script in trt_pose and the hand pose data collected in Nvidia. + +Model details: resnet18 diff --git a/tasks/hand_pose/cursor_control_live_demo.ipynb b/tasks/hand_pose/cursor_control_live_demo.ipynb new file mode 100644 index 0000000..bcd3d41 --- /dev/null +++ b/tasks/hand_pose/cursor_control_live_demo.ipynb @@ -0,0 +1,322 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Matplotlib created a temporary config/cache directory at /tmp/matplotlib-kjp96j9b because the default path (/home/mikyas/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.\n" + ] + } + ], + "source": [ + "import json\n", + "import cv2\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.image as mpimg \n", + "import trt_pose.coco\n", + "import math\n", + "import os\n", + "import numpy as np\n", + "import traitlets\n", + "import pickle \n", + "import pyautogui\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open('hand_pose.json', 'r') as f:\n", + " hand_pose = json.load(f)\n", + "\n", + "topology = trt_pose.coco.coco_category_to_topology(hand_pose)\n", + "import trt_pose.models\n", + "\n", + "num_parts = len(hand_pose['keypoints'])\n", + "num_links = len(hand_pose['skeleton'])\n", + "\n", + "model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()\n", + "import torch\n", + "\n", + "\n", + "WIDTH = 256\n", + "HEIGHT = 256\n", + "data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()\n", + "\n", + "if not os.path.exists('resnet18_244x224_epoch_4150_trt.pth'):\n", + " MODEL_WEIGHTS = 'resnet18_244x224_epoch_4150.pth'\n", + " model.load_state_dict(torch.load(MODEL_WEIGHTS))\n", + " import torch2trt\n", + " model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)\n", + " OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n", + " torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)\n", + "\n", + "\n", + "OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n", + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from trt_pose.draw_objects import DrawObjects\n", + "from trt_pose.parse_objects import ParseObjects\n", + "\n", + "parse_objects = ParseObjects(topology,cmap_threshold=0.15, link_threshold=0.15)\n", + "draw_objects = DrawObjects(topology)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import torchvision.transforms as transforms\n", + "import PIL.Image\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda()\n", + "device = torch.device('cuda')\n", + "\n", + "def preprocess(image):\n", + " global device\n", + " device = torch.device('cuda')\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device)\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.svm import SVC\n", + "clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from preprocessdata import preprocessdata\n", + "preprocessdata = preprocessdata(topology, num_parts)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "svm_train = False\n", + "if svm_train:\n", + " clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, labels_train, hand.labels_test)\n", + " filename = 'svmmodel.sav'\n", + " pickle.dump(clf, open(filename, 'wb'))\n", + "else:\n", + " filename = 'svmmodel_new.sav'\n", + " clf = pickle.load(open(filename, 'rb'))" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.usb_camera import USBCamera\n", + "from jetcam.csi_camera import CSICamera\n", + "from jetcam.utils import bgr8_to_jpeg\n", + "\n", + "camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)\n", + "#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "f735c3a4f55842d3bde40004c969478b", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Image(value=b'', format='jpeg', height='256', width='256')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import ipywidgets\n", + "from IPython.display import display\n", + "\n", + "\n", + "image_w = ipywidgets.Image(format='jpeg', width=256, height=256)\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "screenWidth, screenHeight = pyautogui.size()\n", + "p_text = 'none'\n", + "pyautogui.FAILSAFE = False\n", + "def control_cursor(text, joints):\n", + " global p_text\n", + " if p_text!=\"stop\" and text==\"stop\":\n", + " pyautogui.click()\n", + " if text == \"pan\":\n", + " pyautogui.moveTo(((joints[8][0])*1000)/256, ((joints[8][1])*700)/256)\n", + " #pyautogui.moveTo((joints[8][0]), (joints[8][1]))\n", + " p_text = text" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " data = preprocess(image)\n", + " cmap, paf = model_trt(data)\n", + " cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n", + " counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)\n", + " draw_objects(image, counts, objects, peaks)\n", + " joints = preprocessdata.joints_inference(image, counts, objects, peaks)\n", + " dist_bn_joints = preprocessdata.find_distance(joints)\n", + " gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])\n", + " gesture_joints = gesture[0]\n", + " preprocessdata.prev_queue.append(gesture_joints)\n", + " preprocessdata.prev_queue.pop(0)\n", + " preprocessdata.print_label(image, preprocessdata.prev_queue)\n", + " control_cursor(preprocessdata.text, joints)\n", + " image_w.value = bgr8_to_jpeg(image)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "execute({'new': camera.value})" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.unobserve_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.running = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tasks/hand_pose/dataloader.py b/tasks/hand_pose/dataloader.py new file mode 100644 index 0000000..5e17f5c --- /dev/null +++ b/tasks/hand_pose/dataloader.py @@ -0,0 +1,81 @@ +import os +import json +import cv2 +import numpy as np + +class dataloader: + + + def __init__(self, path, label_file, test_label): + self.train_path = path+"training/" + self.test_path = path+"testing/" + self.label_path = path+label_file + self.test_label_path = path+test_label + self.train_data = [] + self.train_images =[] + self.train_file_name = [] + self.test_data = [] + self.test_images = [] + self.test_file_name = [] + self.labels_train = [] + self.labels_test = [] + self.load_hand_dataset(self.train_path, self.test_path) + self._assert_exist(self.label_path) + self._assert_exist(self.test_label_path) + self.load_labels(self.label_path, self.test_label_path) + + + + def _assert_exist(self, label_path): + msg = 'File is not availble: %s' % label_path + assert os.path.exists(label_path), msg + def load_labels(self, label_path, test_label): + self._assert_exist(label_path) + self._assert_exist(test_label) + with open(label_path, 'r') as f: + label_data = json.load(f) + self.labels_train = label_data["labels"] + with open(test_label, 'r') as f: + test_label = json.load(f) + self.labels_test = test_label["labels"] + + #return labels_train, labels_test + def scaled_data(self, train_data, test_data): + raw_scaler = preprocessing.StandardScaler().fit(train_data) + scaled_train_data = raw_scaler.transform(train_data) + scaled_test_data = raw_scaler.transform(test_data) + return scaled_train_data, scaled_test_data, raw_scaler + def load_hand_dataset(self, train_path, test_path): + WIDTH = 256 + HEIGHT = 256 + for filename in sorted(os.listdir(train_path)): + self.train_file_name.append(filename) + image = cv2.imread(train_path+filename) + #image = image[:, ::-1, :] + #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA) + self.train_data.append(np.reshape(np.array(image), 196608)) + self.train_images.append(image) + + for filename in sorted(os.listdir(test_path)): + self.test_file_name.append(filename) + image = cv2.imread(test_path+filename) + #image = image[:, ::-1, :] + #image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB) + image = cv2.resize(image, (WIDTH, HEIGHT), interpolation = cv2.INTER_AREA) + self.test_images.append(image) + self.test_data.append(np.reshape(np.array(image), 196608)) + #return train_images, test_images, train_data, test_data + def smaller_dataset(self, dataset, no_samples_per_class, no_of_classes): + total_samples_per_class =100 + start = 0 + end = no_samples_per_class + new_dataset = [] + labels = [] + for i in range(no_of_classes): + new_data = dataset[start:end] + start = start+total_samples_per_class + end = start+no_samples_per_class + new_dataset.extend(new_data) + labels.extend([i+1]*no_samples_per_class) + return new_dataset, labels diff --git a/tasks/hand_pose/draw_hand_pose.ipynb b/tasks/hand_pose/draw_hand_pose.ipynb new file mode 100644 index 0000000..03b0d35 --- /dev/null +++ b/tasks/hand_pose/draw_hand_pose.ipynb @@ -0,0 +1,278 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import json\n", + "import cv2\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.image as mpimg \n", + "import trt_pose.coco\n", + "import math\n", + "import os\n", + "import numpy as np\n", + "import traitlets\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "with open('hand_pose.json', 'r') as f:\n", + " hand_pose = json.load(f)\n", + "\n", + "topology = trt_pose.coco.coco_category_to_topology(hand_pose)\n", + "import trt_pose.models\n", + "\n", + "num_parts = len(hand_pose['keypoints'])\n", + "num_links = len(hand_pose['skeleton'])\n", + "\n", + "model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()\n", + "import torch\n", + "\n", + "\n", + "WIDTH = 256\n", + "HEIGHT = 256\n", + "data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()\n", + "\n", + "if not os.path.exists('resnet18_244x224_epoch_4150_trt.pth'):\n", + " MODEL_WEIGHTS = 'resnet18_244x224_epoch_4150.pth'\n", + " model.load_state_dict(torch.load(MODEL_WEIGHTS))\n", + " import torch2trt\n", + " model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)\n", + " OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n", + " torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)\n", + "\n", + "\n", + "OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n", + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from trt_pose.draw_objects import DrawObjects\n", + "from trt_pose.parse_objects import ParseObjects\n", + "\n", + "parse_objects = ParseObjects(topology,cmap_threshold=0.15, link_threshold=0.15)\n", + "draw_objects = DrawObjects(topology)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import torchvision.transforms as transforms\n", + "import PIL.Image\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda()\n", + "device = torch.device('cuda')\n", + "\n", + "def preprocess(image):\n", + " global device\n", + " device = torch.device('cuda')\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device)\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, let's define a function that will preprocess the image, which is originally in BGR8 / HWC format." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.usb_camera import USBCamera\n", + "from jetcam.csi_camera import CSICamera\n", + "from jetcam.utils import bgr8_to_jpeg\n", + "\n", + "camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)\n", + "#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from preprocessdata import preprocessdata\n", + "preprocessdata = preprocessdata(topology, num_parts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets\n", + "from IPython.display import display\n", + "\n", + "\n", + "image_w = ipywidgets.Image(format='jpeg', width=256, height=256)\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "pen = []\n", + "def draw(image, joints):\n", + " cv2.circle(image, (joints[17][0], joints[17][1]), 1,(255,0,255), 2)\n", + " cv2.circle(image, (joints[9][0], joints[9][1]), 1,(0,255,0), 2)\n", + " cv2.circle(image, (joints[5][0], joints[5][1]), 1,(255,255,255), 2)\n", + " cv2.circle(image, (joints[1][0], joints[1][1]), 1,(0,0,0), 2)\n", + " dist_between_j17_j1 = math.sqrt((joints[17][0]-joints[1][0])**2+(joints[17][1]-joints[1][1])**2)\n", + " dist_between_j9_j1 = math.sqrt((joints[9][0]-joints[1][0])**2+(joints[9][1]-joints[1][1])**2)\n", + " global pen\n", + " if dist_between_j9_j1<30:\n", + " pen.append((joints[5][0], joints[5][1]))\n", + " for i in range(len(pen)):\n", + " if i > 0:\n", + " cv2.line(image,pen[i-1], pen[i], (0,0,0), 2)\n", + " #cv2.circle(image, pen[i], 1,(0,0,0), 2)\n", + " if dist_between_j17_j1<5:\n", + " pen.clear()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def draw_pose(image, joints):\n", + " for i in range (len(joints)):\n", + " cv2.circle(image, (joints[i][0], joints[i][1]), 1,(0,0,255), 2)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " data = preprocess(image)\n", + " cmap, paf = model_trt(data)\n", + " cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n", + " counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)\n", + " draw_objects(image, counts, objects, peaks) \n", + " joints = preprocessdata.joints_inference(image, counts, objects, peaks)\n", + " dist_bn_joints = preprocessdata.find_distance(joints)\n", + " #draw(image, joints)\n", + " #draw_pose(image, joints)\n", + " image_w.value = bgr8_to_jpeg(image[:, ::-1, :])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "execute({'new': camera.value})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.unobserve_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.running = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tasks/hand_pose/gesture_classification_live_demo.ipynb b/tasks/hand_pose/gesture_classification_live_demo.ipynb new file mode 100644 index 0000000..7c9d158 --- /dev/null +++ b/tasks/hand_pose/gesture_classification_live_demo.ipynb @@ -0,0 +1,323 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Matplotlib created a temporary config/cache directory at /tmp/matplotlib-ik_m4vpa because the default path (/home/mikyas/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.\n" + ] + } + ], + "source": [ + "import json\n", + "import cv2\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.image as mpimg \n", + "import trt_pose.coco\n", + "import math\n", + "import os\n", + "import numpy as np\n", + "import traitlets\n", + "import pickle \n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open('hand_pose.json', 'r') as f:\n", + " hand_pose = json.load(f)\n", + "\n", + "topology = trt_pose.coco.coco_category_to_topology(hand_pose)\n", + "import trt_pose.models\n", + "\n", + "num_parts = len(hand_pose['keypoints'])\n", + "num_links = len(hand_pose['skeleton'])\n", + "\n", + "model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()\n", + "import torch\n", + "\n", + "\n", + "WIDTH = 256\n", + "HEIGHT = 256\n", + "data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()\n", + "\n", + "if not os.path.exists('26k_face_epoch_3500_trt.pth'):\n", + " MODEL_WEIGHTS = '26k_face_epoch_3500.pth'\n", + " model.load_state_dict(torch.load(MODEL_WEIGHTS))\n", + " import torch2trt\n", + " model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)\n", + " OPTIMIZED_MODEL = '26k_face_epoch_3500_trt.pth'\n", + " torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)\n", + "\n", + "\n", + "OPTIMIZED_MODEL = '26k_face_epoch_3500_trt.pth'\n", + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from trt_pose.draw_objects import DrawObjects\n", + "from trt_pose.parse_objects import ParseObjects\n", + "\n", + "parse_objects = ParseObjects(topology,cmap_threshold=0.12, link_threshold=0.15)\n", + "draw_objects = DrawObjects(topology)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import torchvision.transforms as transforms\n", + "import PIL.Image\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda()\n", + "device = torch.device('cuda')\n", + "\n", + "def preprocess(image):\n", + " global device\n", + " device = torch.device('cuda')\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device)\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.svm import SVC\n", + "clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from preprocessdata import preprocessdata\n", + "preprocessdata = preprocessdata(topology, num_parts)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "svm_train = False\n", + "if svm_train:\n", + " clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, hand.labels_train, hand.labels_test)\n", + " filename = 'svmmodel.sav'\n", + " pickle.dump(clf, open(filename, 'wb'))\n", + "else:\n", + " filename = 'svmmodel.sav'\n", + " clf = pickle.load(open(filename, 'rb'))" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.usb_camera import USBCamera\n", + "from jetcam.csi_camera import CSICamera\n", + "from jetcam.utils import bgr8_to_jpeg\n", + "\n", + "camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)\n", + "#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [], + "source": [ + "def draw_joints(image, joints):\n", + " count = 0\n", + " for i in joints:\n", + " if i==[0,0]:\n", + " count+=1\n", + " if count>= 3:\n", + " return \n", + " for i in joints:\n", + " cv2.circle(image, (i[0],i[1]), 2, (0,0,255), 1)\n", + " cv2.circle(image, (joints[0][0],joints[0][1]), 2, (255,0,255), 1)\n", + " for i in hand_pose['skeleton']:\n", + " if joints[i[0]-1][0]==0 or joints[i[1]-1][0] == 0:\n", + " break\n", + " cv2.line(image, (joints[i[0]-1][0],joints[i[0]-1][1]), (joints[i[1]-1][0],joints[i[1]-1][1]), (0,255,0), 1)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "c09b79578b5249a48aa40018667da6b6", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Image(value=b'', format='jpeg', height='256', width='256')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import ipywidgets\n", + "from IPython.display import display\n", + "\n", + "\n", + "image_w = ipywidgets.Image(format='jpeg', width=256, height=256)\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " data = preprocess(image)\n", + " cmap, paf = model_trt(data)\n", + " cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n", + " counts, objects, peaks = parse_objects(cmap, paf)\n", + " joints = preprocessdata.joints_inference(image, counts, objects, peaks)\n", + " draw_joints(image, joints)\n", + " dist_bn_joints = preprocessdata.find_distance(joints)\n", + " gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])\n", + " gesture_joints = gesture[0]\n", + " preprocessdata.prev_queue.append(gesture_joints)\n", + " preprocessdata.prev_queue.pop(0)\n", + " preprocessdata.print_label(image, preprocessdata.prev_queue)\n", + " image_w.value = bgr8_to_jpeg(image)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "execute({'new': camera.value})" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.unobserve_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.running = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tasks/hand_pose/gesture_classifier.py b/tasks/hand_pose/gesture_classifier.py new file mode 100644 index 0000000..8f2ad48 --- /dev/null +++ b/tasks/hand_pose/gesture_classifier.py @@ -0,0 +1,21 @@ +from sklearn.pipeline import make_pipeline +from sklearn.preprocessing import StandardScaler + +class gesture_classifier: + + def __init__(self): + pass + + def svm_accuracy(self, test_predicted, labels_test): + predicted = [] + for i in range(len(labels_test)): + if labels_test[i]==test_predicted[i]: + predicted.append(0) + else: + predicted.append(1) + accuracy = 1 - sum(predicted)/len(labels_test) + return accuracy + def trainsvm(self, clf, train_data, test_data, labels_train, labels_test): + clf.fit(train_data,labels_train) + predicted_test = clf.predict(test_data) + return clf, predicted_test \ No newline at end of file diff --git a/tasks/hand_pose/gesture_data_collection.ipynb b/tasks/hand_pose/gesture_data_collection.ipynb new file mode 100644 index 0000000..0658057 --- /dev/null +++ b/tasks/hand_pose/gesture_data_collection.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook creates a dataset (images and labels as a json file). The dataset created can be used for pose classification. \n", + "In order to create a new dataset for gesture recoginition specify the following parameters \n", + "\n", + "**no_of_classes** - Number of classes to be created. i.e. For hand pose the number of hand gestures to be created.\n", + "\n", + "**path_dir** - Path to the directory to be created\n", + "\n", + "**dataset_name** - The name of the dataset to be created\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "def create_directories_for_classes(no_of_classes, path_dir, dataset_name):\n", + " dir_ = os.path.join(path_dir, dataset_name)\n", + " for i in range(no_of_classes):\n", + " dir_to_create = os.path.join(dir_,\"%s\" % (i+1))\n", + " try:\n", + " os.makedirs(dir_to_create)\n", + " except FileExistsError:\n", + " print(os.path.join(\"The following directory was not created because it already exsists\", dir_ , ))\n" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "dir_datasets = '/home/mikyas/mike_dataset/jj/'\n", + "dataset_name = \"hand_dataset\"\n", + "no_of_classes = 5\n", + "create_directories_for_classes(no_of_classes, dir_datasets, dataset_name )" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "False" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import ipywidgets.widgets as widgets\n", + "dir_ = os.path.join(dir_datasets, dataset_name)\n", + "curr_class_no = 1\n", + "button_layout = widgets.Layout(width='128px', height='32px')\n", + "curr_dir = os.path.join(dir_,'%s'%curr_class_no )\n", + "collecting_button = widgets.Button(description= 'Collect Class ' + str(curr_class_no), button_style='success', layout=button_layout)\n", + "prev_button = widgets.Button(description='Previous Class', button_style='primary', layout=button_layout)\n", + "nxt_button = widgets.Button(description='Next Class', button_style='info', layout=button_layout)\n", + "\n", + "dir_count = widgets.IntText(layout=button_layout, value=len(os.listdir(curr_dir)))\n", + "dir_count.continuous_update" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [], + "source": [ + "from uuid import uuid1\n", + "def save_snapshot(directory):\n", + " image_path = os.path.join(directory, str(uuid1()) + '.jpg')\n", + " with open(image_path, 'wb') as f:\n", + " f.write(image_w.value)\n", + "def save_dir():\n", + " global curr_dir, dir_count\n", + " save_snapshot(curr_dir)\n", + " dir_count.value = len(os.listdir(curr_dir))\n", + "def prev_dir():\n", + " global curr_class_no, curr_dir, no_of_classes\n", + " if curr_class_no>1:\n", + " curr_class_no-=1\n", + " curr_dir = os.path.join(dir_,'%s'%curr_class_no )\n", + " collecting_button.description = 'Collect Class ' + str(curr_class_no)\n", + " dir_count.value = len(os.listdir(curr_dir))\n", + " dir_count.continuous_update\n", + "def nxt_dir():\n", + " global curr_class_no, curr_dir, no_of_classes\n", + " if curr_class_no1:\n", + " curr_class_no-=1\n", + " curr_dir = os.path.join(dir_,'%s'%curr_class_no )\n", + " collecting_button.description = 'Collect Class ' + str(curr_class_no)\n", + " dir_count.value = len(os.listdir(curr_dir))\n", + " dir_count.continuous_update\n", + "def nxt_dir():\n", + " global curr_class_no, curr_dir, no_of_classes\n", + " if curr_class_no= 3:\n", + " return \n", + " for i in joints:\n", + " cv2.circle(image, (i[0],i[1]), 2, (0,0,255), 1)\n", + " cv2.circle(image, (joints[0][0],joints[0][1]), 2, (255,0,255), 1)\n", + " for i in hand_pose['skeleton']:\n", + " if joints[i[0]-1][0]==0 or joints[i[1]-1][0] == 0:\n", + " break\n", + " cv2.line(image, (joints[i[0]-1][0],joints[i[0]-1][1]), (joints[i[1]-1][0],joints[i[1]-1][1]), (0,255,0), 1)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.usb_camera import USBCamera\n", + "from jetcam.csi_camera import CSICamera\n", + "from jetcam.utils import bgr8_to_jpeg\n", + "\n", + "camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)\n", + "#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets\n", + "from IPython.display import display\n", + "\n", + "\n", + "image_w = ipywidgets.Image(format='jpeg', width=256, height=256)\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " data = preprocess(image)\n", + " cmap, paf = model_trt(data)\n", + " cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n", + " counts, objects, peaks = parse_objects(cmap, paf)\n", + " joints = preprocessdata.joints_inference(image, counts, objects, peaks)\n", + " draw_joints(image, joints)\n", + " image_w.value = bgr8_to_jpeg(image[:, ::-1, :])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "execute({'new': camera.value})" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.unobserve_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.running = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tasks/hand_pose/preprocessdata.py b/tasks/hand_pose/preprocessdata.py new file mode 100644 index 0000000..ead0ae1 --- /dev/null +++ b/tasks/hand_pose/preprocessdata.py @@ -0,0 +1,93 @@ +import math +import pickle +import cv2 + + +class preprocessdata: + + def __init__(self, topology, num_parts): + self.joints = [] + self.dist_bn_joints = [] + self.topology = topology + self.num_parts = num_parts + self.text = "no hand" + self.num_frames = 7 + self.prev_queue = [7]*self.num_frames + + def svm_accuracy(self, test_predicted, labels_test): + predicted = [] + for i in range(len(labels_test)): + if labels_test[i]==test_predicted[i]: + predicted.append(0) + else: + predicted.append(1) + accuracy = 1 - sum(predicted)/len(labels_test) + return accuracy + def trainsvm(self, clf, train_data, test_data, labels_train, labels_test): + clf.fit(train_data,labels_train) + predicted_test = clf.predict(test_data) + return clf, predicted_test + #def loadsvmweights(): + + def joints_inference(self, image, counts, objects, peaks): + joints_t = [] + height = image.shape[0] + width = image.shape[1] + K = self.topology.shape[0] + count = int(counts[0]) + for i in range(count): + obj = objects[0][i] + C = obj.shape[0] + for j in range(C): + k = int(obj[j]) + picked_peaks = peaks[0][j][k] + joints_t.append([round(float(picked_peaks[1]) * width), round(float(picked_peaks[0]) * height)]) + joints_pt = joints_t[:self.num_parts] + rest_of_joints_t = joints_t[self.num_parts:] + """ + #when it does not predict a particular joint in the same association it will try to find it in a different association + for i in range(len(rest_of_joints_t)): + l = i%self.num_parts + if joints_pt[l] == [0,0]: + joints_pt[l] = rest_of_joints_t[i] + #if nothing is predicted + """ + if count == 0: + joints_pt = [[0,0]]*self.num_parts + return joints_pt + def find_distance(self, joints): + joints_features = [] + for i in joints: + for j in joints: + dist_between_i_j = math.sqrt((i[0]-j[0])**2+(i[1]-j[1])**2) + joints_features.append(dist_between_i_j) + return joints_features + def print_label(self, image, gesture_joints): + font = cv2.FONT_HERSHEY_SIMPLEX + color = (255, 0, 0) + org = (50, 50) + thickness = 2 + fontScale = 0.5 + if self.prev_queue == [1]*7: + self.text = 'fist' + elif self.prev_queue == [2]*7: + self.text = 'pan' + elif self.prev_queue == [3]*7: + self.text = 'stop' + elif self.prev_queue == [4]*7: + self.text = 'peace' + elif self.prev_queue == [5]*7: + self.text = 'fine' + elif self.prev_queue == [6]*7: + self.text = 'no hand' + elif self.prev_queue == [7]*7: + self.text = 'no hand' + image = cv2.putText(image, self.text, org, font, + fontScale, color, thickness, cv2.LINE_AA) + return image + + + + + + \ No newline at end of file diff --git a/tasks/hand_pose/svmmodel.sav b/tasks/hand_pose/svmmodel.sav new file mode 100644 index 0000000..f57f689 Binary files /dev/null and b/tasks/hand_pose/svmmodel.sav differ diff --git a/tasks/hand_pose/train_gesture_classification.ipynb b/tasks/hand_pose/train_gesture_classification.ipynb new file mode 100644 index 0000000..e91ac79 --- /dev/null +++ b/tasks/hand_pose/train_gesture_classification.ipynb @@ -0,0 +1,401 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Matplotlib created a temporary config/cache directory at /tmp/matplotlib-mn8hww0h because the default path (/home/mikyas/.cache/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.\n" + ] + } + ], + "source": [ + "import json\n", + "import cv2\n", + "import matplotlib.pyplot as plt\n", + "import matplotlib.image as mpimg \n", + "import trt_pose.coco\n", + "import math\n", + "import os\n", + "import numpy as np\n", + "import traitlets\n", + "import pickle \n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "with open('hand_pose.json', 'r') as f:\n", + " hand_pose = json.load(f)\n", + "\n", + "topology = trt_pose.coco.coco_category_to_topology(hand_pose)\n", + "import trt_pose.models\n", + "\n", + "num_parts = len(hand_pose['keypoints'])\n", + "num_links = len(hand_pose['skeleton'])\n", + "\n", + "model = trt_pose.models.resnet18_baseline_att(num_parts, 2 * num_links).cuda().eval()\n", + "import torch\n", + "\n", + "\n", + "WIDTH = 256\n", + "HEIGHT = 256\n", + "data = torch.zeros((1, 3, HEIGHT, WIDTH)).cuda()\n", + "\n", + "if not os.path.exists('resnet18_244x224_epoch_4150_trt.pth'):\n", + " MODEL_WEIGHTS = 'resnet18_244x224_epoch_4150.pth'\n", + " model.load_state_dict(torch.load(MODEL_WEIGHTS))\n", + " import torch2trt\n", + " model_trt = torch2trt.torch2trt(model, [data], fp16_mode=True, max_workspace_size=1<<25)\n", + " OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n", + " torch.save(model_trt.state_dict(), OPTIMIZED_MODEL)\n", + "\n", + "\n", + "OPTIMIZED_MODEL = 'resnet18_244x224_epoch_4150_trt.pth'\n", + "from torch2trt import TRTModule\n", + "\n", + "model_trt = TRTModule()\n", + "model_trt.load_state_dict(torch.load(OPTIMIZED_MODEL))" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "from trt_pose.draw_objects import DrawObjects\n", + "from trt_pose.parse_objects import ParseObjects\n", + "\n", + "parse_objects = ParseObjects(topology,cmap_threshold=0.12, link_threshold=0.15)\n", + "draw_objects = DrawObjects(topology)" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "\n", + "import torchvision.transforms as transforms\n", + "import PIL.Image\n", + "\n", + "mean = torch.Tensor([0.485, 0.456, 0.406]).cuda()\n", + "std = torch.Tensor([0.229, 0.224, 0.225]).cuda()\n", + "device = torch.device('cuda')\n", + "\n", + "def preprocess(image):\n", + " global device\n", + " device = torch.device('cuda')\n", + " image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)\n", + " image = PIL.Image.fromarray(image)\n", + " image = transforms.functional.to_tensor(image).to(device)\n", + " image.sub_(mean[:, None, None]).div_(std[:, None, None])\n", + " return image[None, ...]" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "from sklearn.pipeline import make_pipeline\n", + "from sklearn.preprocessing import StandardScaler\n", + "from sklearn.svm import SVC\n", + "clf = make_pipeline(StandardScaler(), SVC(gamma='auto', kernel='rbf'))" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [], + "source": [ + "from preprocessdata import preprocessdata\n", + "preprocessdata = preprocessdata(topology, num_parts)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from dataloader import dataloader\n", + "path = \"/home/mikyas/mike_dataset/gestures/hand_dataset/\"\n", + "label_file = \"hand_dataset.json\"\n", + "test_label = \"hand_dataset_test.json\"\n", + "hand = dataloader(path, label_file, test_label)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def data_preprocess(images):\n", + " dist_bn_joints_all_data = []\n", + " for im in images:\n", + " im = im[:, ::-1, :]\n", + " data_im = preprocess(im)\n", + " cmap, paf = model_trt(data_im)\n", + " cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n", + " counts, objects, peaks = parse_objects(cmap, paf)\n", + " joints = preprocessdata.joints_inference(im, counts, objects, peaks)\n", + " dist_bn_joints = preprocessdata.find_distance(joints)\n", + " dist_bn_joints_all_data.append(dist_bn_joints)\n", + " return dist_bn_joints_all_data" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def smaller_dataset(dataset, no_samples_per_class, no_of_classes):\n", + " total_samples_per_class =100\n", + " start = 0\n", + " end = no_samples_per_class\n", + " new_dataset = []\n", + " labels = []\n", + " for i in range(no_of_classes):\n", + " new_data = dataset[start:end]\n", + " start = start+total_samples_per_class\n", + " end = start+no_samples_per_class\n", + " new_dataset.extend(new_data)\n", + " labels.extend([i+1]*no_samples_per_class)\n", + " return new_dataset, labels" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "train_images, labels_train = hand.smaller_dataset(hand.train_images,100,6)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "joints_train = data_preprocess(hand.train_images)\n", + "joints_test = data_preprocess(hand.test_images)" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [], + "source": [ + "svm_train = False\n", + "if svm_train:\n", + " clf, predicted = preprocessdata.trainsvm(clf, joints_train, joints_test, hand.labels_train, hand.labels_test)\n", + " filename = 'svmmodel_new.sav'\n", + " pickle.dump(clf, open(filename, 'wb'))\n", + "else:\n", + " filename = 'svmmodel.sav'\n", + " clf = pickle.load(open(filename, 'rb'))" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "preprocessdata.svm_accuracy(clf.predict(joints_test), hand.labels_test)\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "clf.predict([joints_test[40],[0]*num_parts*num_parts])" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "clf.predict(joints_test)" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [], + "source": [ + "from jetcam.usb_camera import USBCamera\n", + "from jetcam.csi_camera import CSICamera\n", + "from jetcam.utils import bgr8_to_jpeg\n", + "\n", + "camera = USBCamera(width=WIDTH, height=HEIGHT, capture_fps=30, capture_device=1)\n", + "#camera = CSICamera(width=WIDTH, height=HEIGHT, capture_fps=30)\n", + "\n", + "camera.running = True" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0eb636d637824f2596b9f26ee5c970c1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Image(value=b'', format='jpeg', height='256', width='256')" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], + "source": [ + "import ipywidgets\n", + "from IPython.display import display\n", + "\n", + "\n", + "image_w = ipywidgets.Image(format='jpeg', width=256, height=256)\n", + "display(image_w)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "metadata": {}, + "outputs": [], + "source": [ + "def execute(change):\n", + " image = change['new']\n", + " data = preprocess(image)\n", + " cmap, paf = model_trt(data)\n", + " cmap, paf = cmap.detach().cpu(), paf.detach().cpu()\n", + " counts, objects, peaks = parse_objects(cmap, paf)#, cmap_threshold=0.15, link_threshold=0.15)\n", + " draw_objects(image, counts, objects, peaks)\n", + " joints = preprocessdata.joints_inference(image, counts, objects, peaks)\n", + " dist_bn_joints = preprocessdata.find_distance(joints)\n", + " gesture = clf.predict([dist_bn_joints,[0]*num_parts*num_parts])\n", + " gesture_joints = gesture[0]\n", + " preprocessdata.prev_queue.append(gesture_joints)\n", + " preprocessdata.prev_queue.pop(0)\n", + " preprocessdata.print_label(image, preprocessdata.prev_queue)\n", + " image_w.value = bgr8_to_jpeg(image)\n" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [], + "source": [ + "execute({'new': camera.value})" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [], + "source": [ + "camera.observe(execute, names='value')" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "camera.unobserve_all()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "#camera.running = False" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.9" + } + }, + "nbformat": 4, + "nbformat_minor": 4 +} diff --git a/tasks/pose_classifiaction_utils/gesture_data_collection.ipynb b/tasks/pose_classifiaction_utils/gesture_data_collection.ipynb new file mode 100644 index 0000000..06d09ed --- /dev/null +++ b/tasks/pose_classifiaction_utils/gesture_data_collection.ipynb @@ -0,0 +1,307 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "import json" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "This notebook creates a dataset (images and labels as a json file). The dataset created can be used for pose classification. \n", + "In order to create a new dataset for gesture recoginition specify the following parameters \n", + "\n", + "**no_of_classes** - Number of classes to be created. i.e. For hand pose the number of hand gestures to be created.\n", + "\n", + "**path_dir** - Path to the directory to be created\n", + "\n", + "**dataset_name** - The name of the dataset to be created\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "def create_directories_for_classes(no_of_classes, path_dir, dataset_name):\n", + " dir_ = os.path.join(path_dir, dataset_name)\n", + " for i in range(no_of_classes):\n", + " dir_to_create = os.path.join(dir_,\"%s\" % (i+1))\n", + " try:\n", + " os.makedirs(dir_to_create)\n", + " except FileExistsError:\n", + " print(os.path.join(\"The following directory was not created because it already exsists\", dir_ , ))\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "dir_datasets = '/home/mikyas/mike_dataset/bbbb/'\n", + "dataset_name = \"hand_dataset\"\n", + "no_of_classes = 3\n", + "create_directories_for_classes(no_of_classes, dir_datasets, dataset_name )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import ipywidgets.widgets as widgets\n", + "dir_ = os.path.join(dir_datasets, dataset_name)\n", + "curr_class_no = 1\n", + "button_layout = widgets.Layout(width='128px', height='32px')\n", + "curr_dir = os.path.join(dir_,'%s'%curr_class_no )\n", + "collecting_button = widgets.Button(description= 'Collect Class ' + str(curr_class_no), button_style='success', layout=button_layout)\n", + "prev_button = widgets.Button(description='Previous Class', button_style='primary', layout=button_layout)\n", + "nxt_button = widgets.Button(description='Next Class', button_style='info', layout=button_layout)\n", + "\n", + "dir_count = widgets.IntText(layout=button_layout, value=len(os.listdir(curr_dir)))\n", + "dir_count.continuous_update" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from uuid import uuid1\n", + "def save_snapshot(directory):\n", + " image_path = os.path.join(directory, str(uuid1()) + '.jpg')\n", + " with open(image_path, 'wb') as f:\n", + " f.write(image_w.value)\n", + "def save_dir():\n", + " global curr_dir, dir_count\n", + " save_snapshot(curr_dir)\n", + " dir_count.value = len(os.listdir(curr_dir))\n", + "def prev_dir():\n", + " global curr_class_no, curr_dir, no_of_classes\n", + " if curr_class_no>1:\n", + " curr_class_no-=1\n", + " curr_dir = os.path.join(dir_,'%s'%curr_class_no )\n", + " collecting_button.description = 'Collect Class ' + str(curr_class_no)\n", + " dir_count.value = len(os.listdir(curr_dir))\n", + " dir_count.continuous_update\n", + "def nxt_dir():\n", + " global curr_class_no, curr_dir, no_of_classes\n", + " if curr_class_no