目录
原教程来源于;作者是;
如果想要学习的话,建议看原链接教程,本文为本人笔记内容;
在这篇教程中,将学习如何使用OpenCV实现人脸识别。为了建立人脸识别系统,需要执行下面几个步骤:
- Face Detection:人脸检测,从给定的图片中检测人脸位置信息;
- Extract face embeddings:提取人脸特征;利用深度学习,对上一步中得到的人脸图像提取embeddings;
- Train a face recognition model:训练,利用人脸的embeddings,训练SVM分类器;
- Recognize face:识别;从图像或视频中检测人脸;
那么下面将从这几个方面来叙述;
1. Face Detection and Extract Face Embeddings
该步骤包含两个部分,人脸检测、提取人脸特征;这两个部分都是基于深度学习做的;首先,利用训练好的人脸检测模型对待检测图像进行人脸检测,提取人脸位置信息;其次,将提取的人脸图像输入到Embeddings模型中,提取人脸的Embedddings,是一个128维的向量;该向量用于下一步训练一个SVM的人脸识别分类器;
# 人脸检测模型:caffe./face_detection_model/deploy.prototxt./face_detection_model/res10_300x300_ssd_iter_140000.caffemodel# 加载方式:detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)
# 提取Embeddings模型:Torch./openface_nn4.small2.v1.t7# 加载方式embedder = cv2.dnn.readNetFromTorch(embedding_model)
完整代码:
# USAGE# python extract_embeddings.py --dataset dataset --embeddings output/embeddings.pickle \# --detector face_detection_model --embedding-model openface_nn4.small2.v1.t7# import the necessary packagesfrom imutils import pathsimport numpy as npimport argparseimport imutilsimport pickleimport cv2import osimport pdb# construct the argument parser and parse the argumentsap = argparse.ArgumentParser()ap.add_argument("-i", "--dataset", required=True, help="path to input directory of faces + images")ap.add_argument("-e", "--embeddings", required=True, help="path to output serialized db of facial embeddings")ap.add_argument("-d", "--detector", required=True, help="path to OpenCV's deep learning face detector")ap.add_argument("-m", "--embedding-model", required=True, help="path to OpenCV's deep learning face embedding model")ap.add_argument("-c", "--confidence", type=float, default=0.5, help="minimum probability to filter weak detections")args = vars(ap.parse_args())# load our serialized face detector from diskprint("[INFO] loading face detector...")protoPath = os.path.sep.join([args["detector"], "deploy.prototxt"])modelPath = os.path.sep.join([args["detector"], "res10_300x300_ssd_iter_140000.caffemodel"])detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)# load our serialized face embedding model from diskprint("[INFO] loading face recognizer...")embedder = cv2.dnn.readNetFromTorch(args["embedding_model"])# grab the paths to the input images in our datasetprint("[INFO] quantifying faces...")imagePaths = list(paths.list_images(args["dataset"]))# initialize our lists of extracted facial embeddings and# corresponding people namesknownEmbeddings = []knownNames = []# initialize the total number of faces processedtotal = 0# loop over the image pathsfor (i, imagePath) in enumerate(imagePaths): # extract the person name from the image path print("[INFO] processing image {}/{}".format(i + 1, len(imagePaths))) name = imagePath.split(os.path.sep)[-2] # load the image, resize it to have a width of 600 pixels (while # maintaining the aspect ratio), and then grab the image # dimensions image = cv2.imread(imagePath) image = imutils.resize(image, width=600) (h, w) = image.shape[:2] # construct a blob from the image imageBlob = cv2.dnn.blobFromImage( cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0), swapRB=False, crop=False) # apply OpenCV's deep learning-based face detector to localize # faces in the input image detector.setInput(imageBlob) detections = detector.forward() pdb.set_trace() # ensure at least one face was found if len(detections) > 0: # we're making the assumption that each image has only ONE # face, so find the bounding box with the largest probability i = np.argmax(detections[0, 0, :, 2]) confidence = detections[0, 0, i, 2] # ensure that the detection with the largest probability also # means our minimum probability test (thus helping filter out # weak detections) if confidence > args["confidence"]: # compute the (x, y)-coordinates of the bounding box for # the face box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # extract the face ROI and grab the ROI dimensions face = image[startY:endY, startX:endX] (fH, fW) = face.shape[:2] # ensure the face width and height are sufficiently large if fW < 20 or fH < 20: continue # construct a blob for the face ROI, then pass the blob # through our face embedding model to obtain the 128-d # quantification of the face faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255, (96, 96), (0, 0, 0), swapRB=True, crop=False) embedder.setInput(faceBlob) vec = embedder.forward() # add the name of the person + corresponding face # embedding to their respective lists knownNames.append(name) knownEmbeddings.append(vec.flatten()) total += 1# dump the facial embeddings + names to diskprint("[INFO] serializing {} encodings...".format(total))data = {"embeddings": knownEmbeddings, "names": knownNames}f = open(args["embeddings"], "wb")f.write(pickle.dumps(data))f.close()
2. Train a Face Recognition Model
这一步是训练一个SVM分类器,实现人脸识别;使用的训练数据就是上一步中得到的Embeddings和labels;
# USAGE# python train_model.py --embeddings output/embeddings.pickle \# --recognizer output/recognizer.pickle --le output/le.pickle# import the necessary packagesfrom sklearn.preprocessing import LabelEncoderfrom sklearn.svm import SVCimport argparseimport pickleimport pdbimport numpy as np# construct the argument parser and parse the argumentsap = argparse.ArgumentParser()ap.add_argument("-e", "--embeddings", required=True, help="path to serialized db of facial embeddings")ap.add_argument("-r", "--recognizer", required=True, help="path to output model trained to recognize faces")ap.add_argument("-l", "--le", required=True, help="path to output label encoder")args = vars(ap.parse_args())# load the face embeddingsprint("[INFO] loading face embeddings...")data = pickle.loads(open(args["embeddings"], "rb").read())# encode the labelsprint("[INFO] encoding labels...")le = LabelEncoder()labels = le.fit_transform(data["names"])pdb.set_trace()# train the model used to accept the 128-d embeddings of the face and# then produce the actual face recognitionprint("[INFO] training model...")recognizer = SVC(C=1.0, kernel="linear", probability=True)recognizer.fit(data["embeddings"], labels)# write the actual face recognition model to diskf = open(args["recognizer"], "wb")f.write(pickle.dumps(recognizer))f.close()# write the label encoder to diskf = open(args["le"], "wb")f.write(pickle.dumps(le))f.close()
3. Recognize Face via Image
测试步骤,对单张图片进行人脸识别;需要利用人脸检测模型、提取人脸Embeddings模型、SVM分类模型;
# USAGE# python recognize.py --detector face_detection_model \# --embedding-model openface_nn4.small2.v1.t7 \# --recognizer output/recognizer.pickle \# --le output/le.pickle --image images/adrian.jpg# import the necessary packagesimport numpy as npimport argparseimport imutilsimport pickleimport cv2import osimport pdb# construct the argument parser and parse the argumentsap = argparse.ArgumentParser()ap.add_argument("-i", "--image", required=True, help="path to input image")ap.add_argument("-d", "--detector", required=True, help="path to OpenCV's deep learning face detector")ap.add_argument("-m", "--embedding-model", required=True, help="path to OpenCV's deep learning face embedding model")ap.add_argument("-r", "--recognizer", required=True, help="path to model trained to recognize faces")ap.add_argument("-l", "--le", required=True, help="path to label encoder")ap.add_argument("-c", "--confidence", type=float, default=0.5, help="minimum probability to filter weak detections")args = vars(ap.parse_args())# load our serialized face detector from diskprint("[INFO] loading face detector...")protoPath = os.path.sep.join([args["detector"], "deploy.prototxt"])modelPath = os.path.sep.join([args["detector"], "res10_300x300_ssd_iter_140000.caffemodel"])detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)# load our serialized face embedding model from diskprint("[INFO] loading face recognizer...")embedder = cv2.dnn.readNetFromTorch(args["embedding_model"])# load the actual face recognition model along with the label encoderrecognizer = pickle.loads(open(args["recognizer"], "rb").read())le = pickle.loads(open(args["le"], "rb").read())# load the image, resize it to have a width of 600 pixels (while# maintaining the aspect ratio), and then grab the image dimensionsimage = cv2.imread(args["image"])image = imutils.resize(image, width=600)(h, w) = image.shape[:2]# construct a blob from the imageimageBlob = cv2.dnn.blobFromImage( cv2.resize(image, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0), swapRB=False, crop=False)# apply OpenCV's deep learning-based face detector to localize# faces in the input imagedetector.setInput(imageBlob)detections = detector.forward()pdb.set_trace()# loop over the detectionsfor i in range(0, detections.shape[2]): # extract the confidence (i.e., probability) associated with the # prediction confidence = detections[0, 0, i, 2] # filter out weak detections if confidence > args["confidence"]: # compute the (x, y)-coordinates of the bounding box for the # face box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # extract the face ROI face = image[startY:endY, startX:endX] (fH, fW) = face.shape[:2] pdb.set_trace() # ensure the face width and height are sufficiently large if fW < 20 or fH < 20: continue # construct a blob for the face ROI, then pass the blob # through our face embedding model to obtain the 128-d # quantification of the face faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255, (96, 96), (0, 0, 0), swapRB=True, crop=False) embedder.setInput(faceBlob) vec = embedder.forward() # perform classification to recognize the face preds = recognizer.predict_proba(vec)[0] j = np.argmax(preds) proba = preds[j] name = le.classes_[j] pdb.set_trace() # draw the bounding box of the face along with the associated # probability text = "{}: {:.2f}%".format(name, proba * 100) y = startY - 10 if startY - 10 > 10 else startY + 10 cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2) cv2.putText(image, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2)# show the output imagecv2.imshow("Image", image)cv2.waitKey(0)
4. Recognize Face via Video
对视频流进行人脸识别,类似于单张图片识别;同样需要:人脸检测模型、提取Embeddings模型、SVM分类模型;
# USAGE# python recognize_video.py --detector face_detection_model \# --embedding-model openface_nn4.small2.v1.t7 \# --recognizer output/recognizer.pickle \# --le output/le.pickle# import the necessary packagesfrom imutils.video import VideoStreamfrom imutils.video import FPSimport numpy as npimport argparseimport imutilsimport pickleimport timeimport cv2import os# construct the argument parser and parse the argumentsap = argparse.ArgumentParser()ap.add_argument("-d", "--detector", required=True, help="path to OpenCV's deep learning face detector")ap.add_argument("-m", "--embedding-model", required=True, help="path to OpenCV's deep learning face embedding model")ap.add_argument("-r", "--recognizer", required=True, help="path to model trained to recognize faces")ap.add_argument("-l", "--le", required=True, help="path to label encoder")ap.add_argument("-c", "--confidence", type=float, default=0.5, help="minimum probability to filter weak detections")args = vars(ap.parse_args())# load our serialized face detector from diskprint("[INFO] loading face detector...")protoPath = os.path.sep.join([args["detector"], "deploy.prototxt"])modelPath = os.path.sep.join([args["detector"], "res10_300x300_ssd_iter_140000.caffemodel"])detector = cv2.dnn.readNetFromCaffe(protoPath, modelPath)# load our serialized face embedding model from diskprint("[INFO] loading face recognizer...")embedder = cv2.dnn.readNetFromTorch(args["embedding_model"])# load the actual face recognition model along with the label encoderrecognizer = pickle.loads(open(args["recognizer"], "rb").read())le = pickle.loads(open(args["le"], "rb").read())# initialize the video stream, then allow the camera sensor to warm upprint("[INFO] starting video stream...")vs = VideoStream(src=0).start()time.sleep(2.0)# start the FPS throughput estimatorfps = FPS().start()# loop over frames from the video file streamwhile True: # grab the frame from the threaded video stream frame = vs.read() # resize the frame to have a width of 600 pixels (while # maintaining the aspect ratio), and then grab the image # dimensions frame = imutils.resize(frame, width=600) (h, w) = frame.shape[:2] # construct a blob from the image imageBlob = cv2.dnn.blobFromImage( cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0), swapRB=False, crop=False) # apply OpenCV's deep learning-based face detector to localize # faces in the input image detector.setInput(imageBlob) detections = detector.forward() # loop over the detections for i in range(0, detections.shape[2]): # extract the confidence (i.e., probability) associated with # the prediction confidence = detections[0, 0, i, 2] # filter out weak detections if confidence > args["confidence"]: # compute the (x, y)-coordinates of the bounding box for # the face box = detections[0, 0, i, 3:7] * np.array([w, h, w, h]) (startX, startY, endX, endY) = box.astype("int") # extract the face ROI face = frame[startY:endY, startX:endX] (fH, fW) = face.shape[:2] # ensure the face width and height are sufficiently large if fW < 20 or fH < 20: continue # construct a blob for the face ROI, then pass the blob # through our face embedding model to obtain the 128-d # quantification of the face faceBlob = cv2.dnn.blobFromImage(face, 1.0 / 255, (96, 96), (0, 0, 0), swapRB=True, crop=False) embedder.setInput(faceBlob) vec = embedder.forward() # perform classification to recognize the face preds = recognizer.predict_proba(vec)[0] j = np.argmax(preds) proba = preds[j] name = le.classes_[j] # draw the bounding box of the face along with the # associated probability text = "{}: {:.2f}%".format(name, proba * 100) y = startY - 10 if startY - 10 > 10 else startY + 10 cv2.rectangle(frame, (startX, startY), (endX, endY), (0, 0, 255), 2) cv2.putText(frame, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0, 0, 255), 2) # update the FPS counter fps.update() # show the output frame cv2.imshow("Frame", frame) key = cv2.waitKey(1) & 0xFF # if the `q` key was pressed, break from the loop if key == ord("q"): break# stop the timer and display FPS informationfps.stop()print("[INFO] elasped time: {:.2f}".format(fps.elapsed()))print("[INFO] approx. FPS: {:.2f}".format(fps.fps()))# do a bit of cleanupcv2.destroyAllWindows()vs.stop()