nanti_kita_pelajari_tentang_ini.py

import cv2
import os
import matplotlib.pyplot as plt
import mxnet as mx
import numpy as np

from skimage import transform as trans
from mxnet.contrib.onnx.onnx2mx.import_model import import_model


def download_detection_model(source_url, dirname):
    '''
    A pretrained face detection & alignment model [MTCNN detector]
    (https://kpzhang93.github.io/MTCNN_face_detection_alignment/index.html)
    is used here as a part of the preprocessing step.
    The input image is passed through the detector to obtain an aligned image of the face
    in the input.
    The pretrained models reside in the folder `mtcnn-model` and the scripts
    `mtcnn_detector.py` and `helper.py` contain code for doing inference on those models.
    '''
    for i in range(4):
        mx.test_utils.download(
            dirname=dirname, url=os.path.join(source_url, f"det{i+1}-0001.params"))
        mx.test_utils.download(
            dirname=dirname, url=os.path.join(source_url, f"det{i+1}-symbol.json"))
        mx.test_utils.download(
            dirname=dirname, url=os.path.join(source_url, f"det{i+1}.caffemodel"))
        mx.test_utils.download(
            dirname=dirname, url=os.path.join(source_url, f"det{i+1}.prototxt"))


def get_recognition_model(ctx, model):
    '''
    Loads ONNX model into MXNet symbols and params,
    defines model using symbol file and binds parameters to the model using params file.
    '''
    image_size = (112,112)
    # Import ONNX model
    sym, arg_params, aux_params = import_model(model)
    # Define and binds parameters to the network
    model = mx.mod.Module(symbol=sym, context=ctx, label_names = None)
    model.bind(data_shapes=[('data', (1, 3, image_size[0], image_size[1]))])
    model.set_params(arg_params, aux_params)

    return model


def preprocess(img, bbox=None, landmark=None, **kwargs):
    '''
    Takes output of face detector (bounding box and landmark points for face in the image)
    as input and generates aligned face images
    '''
    M = None
    image_size = []
    str_image_size = kwargs.get('image_size', '')
    # Assert input shape
    if len(str_image_size)>0:
        image_size = [int(x) for x in str_image_size.split(',')]
        if len(image_size)==1:
            image_size = [image_size[0], image_size[0]]
        assert len(image_size)==2
        assert image_size[0]==112
        assert image_size[0]==112 or image_size[1]==96
    
    # Do alignment using landmark points
    if landmark is not None:
        assert len(image_size)==2
        src = np.array([
          [30.2946, 51.6963],
          [65.5318, 51.5014],
          [48.0252, 71.7366],
          [33.5493, 92.3655],
          [62.7299, 92.2041] ], dtype=np.float32 )
        if image_size[1]==112:
            src[:,0] += 8.0
        dst = landmark.astype(np.float32)
        tform = trans.SimilarityTransform()
        tform.estimate(dst, src)
        M = tform.params[0:2,:]
        assert len(image_size)==2
        warped = cv2.warpAffine(img,M,(image_size[1],image_size[0]), borderValue = 0.0)
        return warped
    
    # If no landmark points available, do alignment using bounding box.
    # If no bounding box available use center crop
    if M is None:
        if bbox is None:
            det = np.zeros(4, dtype=np.int32)
            det[0] = int(img.shape[1]*0.0625)
            det[1] = int(img.shape[0]*0.0625)
            det[2] = img.shape[1] - det[0]
            det[3] = img.shape[0] - det[1]
        else:
            det = bbox
        margin = kwargs.get('margin', 44)
        bb = np.zeros(4, dtype=np.int32)
        bb[0] = np.maximum(det[0]-margin/2, 0)
        bb[1] = np.maximum(det[1]-margin/2, 0)
        bb[2] = np.minimum(det[2]+margin/2, img.shape[1])
        bb[3] = np.minimum(det[3]+margin/2, img.shape[0])
        ret = img[bb[1]:bb[3],bb[0]:bb[2],:]
        if len(image_size)>0:
            ret = cv2.resize(ret, (image_size[1], image_size[0]))
        return ret


def get_input(detector, face_img):
    '''
    Passes input images through the face detector, and returns aligned face images
    generated by `preprocess()`
    '''
    # Pass input images through face detector
    ret = detector.detect_face(face_img, det_type = 0)
    if ret is None:
        return None
    bbox, points = ret
    if bbox.shape[0]==0:
        return None
    bbox = bbox[0,0:4]
    points = points[0,:].reshape((2,5)).T
    # Call preprocess() to generate aligned images
    nimg = preprocess(face_img, bbox, points, image_size='112,112')
    nimg = cv2.cvtColor(nimg, cv2.COLOR_BGR2RGB)
    aligned = np.transpose(nimg, (2,0,1))
    return aligned


def get_feature(model, aligned):
    '''
    Performs forward pass on the data `aligned` using `model` and returns the embedding
    '''
    input_blob = np.expand_dims(aligned, axis=0)
    data = mx.nd.array(input_blob)
    db = mx.io.DataBatch(data=(data,))
    model.forward(db, is_train=False)
    embedding = model.get_outputs()[0].asnumpy()
    return embedding


def display_image(input):
    plt.imshow(input)
    plt.show()