前面的文章中介绍了如何编译dlib静态库和动态库,那么下一步就是利用dlib完成一个最简单的识别图片中人脸的程序。该程序参考了一些网上最常用的例子。下面将测试的环境、代码、遇到的问题、解决方法一一列出,供大家参考,希望能给大家一些帮助。
测试平台:x86_64 + 银河麒麟(v10)
开发框架:Qt(5.15.2)
测试代码:
#include <iostream>
#include <QApplication>
#include "dlib/image_processing/frontal_face_detector.h"
#include "dlib/gui_widgets.h"
#include "dlib/image_io.h"
using namespace dlib;
using namespace std;
int main(int argc, char *argv[])
{
auto detector = get_frontal_face_detector();
auto path = "/home/consys/20240315_204947_3.jpg"; //分辨率1280*720
image_window win;
array2d<unsigned char> img;
load_image(img, path); //通过路径加载图像
pyramid_up(img);
std::vector<rectangle> dets = detector(img); //存储人脸矩阵
cout << "number of faces detected:" << dets.size() << endl;
win.clear_overlay(); //清除缓存
win.set_image(img); //设置图像
win.add_overlay(dets, rgb_pixel(255, 0, 0)); //添加一个overlay
pause();
return 0;
}
测试效果:
利用dlib开发的第一个程序,遇到的问题,编写了上述代码后,直接运行遇到的问题:
1.提示找不到dlib相关的符号
/usr/local/include/dlib/test_for_odr_violations.h:24: error: undefined reference to 'USER_ERROR__inconsistent_build_configuration__see_dlib_faq_1_'
/usr/local/include/dlib/test_for_odr_violations.h:51: error: undefined reference to 'DLIB_VERSION_MISMATCH_CHECK__EXPECTED_VERSION_19_24_99'
../face_dlib/main.cpp:18: error: undefined reference to 'dlib::image_window::image_window()'
../face_dlib/main.cpp:26: error: undefined reference to 'dlib::image_window::clear_overlay()'
../face_dlib/main.cpp:18: error: undefined reference to 'dlib::image_window::~image_window()'
../face_dlib/main.cpp:18: error: undefined reference to 'dlib::image_window::~image_window()'
/usr/local/include/dlib/test_for_odr_violations.h:24: error: undefined reference to 'USER_ERROR__inconsistent_build_configuration__see_dlib_faq_1_'
/usr/local/include/dlib/test_for_odr_violations.h:51: error: undefined reference to 'DLIB_VERSION_MISMATCH_CHECK__EXPECTED_VERSION_19_24_99'
/usr/local/include/dlib/matrix/lapack/gesvd.h:38: error: undefined reference to 'dgesvd_'
/usr/local/include/dlib/matrix/matrix_blas_bindings.h:148: error: undefined reference to 'cblas_saxpy'
/usr/local/include/dlib/matrix/matrix_blas_bindings.h:177: error: undefined reference to 'cblas_sscal'
/usr/local/include/dlib/image_processing/frontal_face_detector.h:115: error: undefined reference to 'dlib::base64::base64()'
/usr/local/include/dlib/image_processing/frontal_face_detector.h:2358: error: undefined reference to 'dlib::base64::decode(std::istream&, std::ostream&) const'
/usr/local/include/dlib/image_processing/frontal_face_detector.h:115: error: undefined reference to 'dlib::base64::~base64()'
/usr/local/include/dlib/image_processing/frontal_face_detector.h:115: error: undefined reference to 'dlib::base64::~base64()'
/usr/local/include/dlib/gui_widgets/style.h:527: error: undefined reference to 'dlib::draw_sunken_rectangle(dlib::canvas const&, dlib::rectangle const&, unsigned char)'
/usr/local/include/dlib/compress_stream/compress_stream_kernel_1.h:180: error: undefined reference to 'dlib::entropy_decoder_kernel_2::entropy_decoder_kernel_2()'
/usr/local/include/dlib/compress_stream/compress_stream_kernel_1.h:181: error: undefined reference to 'dlib::entropy_decoder_kernel_2::set_stream(std::istream&)'
/usr/local/include/dlib/compress_stream/compress_stream_kernel_1.h:196: error: undefined reference to 'dlib::entropy_decoder_kernel_2::get_target(unsigned int)'
/usr/local/include/dlib/compress_stream/compress_stream_kernel_1.h:201: error: undefined reference to 'dlib::entropy_decoder_kernel_2::decode(unsigned int, unsigned int)'
/usr/local/include/dlib/compress_stream/compress_stream_kernel_1.h:180: error: undefined reference to 'dlib::entropy_decoder_kernel_2::~entropy_decoder_kernel_2()'
/usr/local/include/dlib/compress_stream/compress_stream_kernel_1.h:180: error: undefined reference to 'dlib::entropy_decoder_kernel_2::~entropy_decoder_kernel_2()'
/usr/local/include/dlib/gui_widgets/widgets.h:3980: error: undefined reference to 'dlib::image_window::add_overlay(std::vector<dlib::image_display::overlay_rect, std::allocator<dlib::image_display::overlay_rect> > const&)'
/usr/local/include/dlib/gui_widgets/widgets.h:3942: error: undefined reference to 'dlib::image_display::get_image_display_rect() const'
/usr/local/include/dlib/gui_widgets/widgets.h:3946: error: undefined reference to 'dlib::base_window::set_size(int, int)'
/usr/local/include/dlib/entropy_decoder_model/entropy_decoder_model_kernel_5.h:422: error: undefined reference to 'dlib::entropy_decoder_kernel_2::get_target(unsigned int)'
/usr/local/include/dlib/entropy_decoder_model/entropy_decoder_model_kernel_5.h:456: error: undefined reference to 'dlib::entropy_decoder_kernel_2::decode(unsigned int, unsigned int)'
/usr/local/include/dlib/entropy_decoder_model/entropy_decoder_model_kernel_5.h:503: error: undefined reference to 'dlib::entropy_decoder_kernel_2::decode(unsigned int, unsigned int)'
/usr/local/include/dlib/entropy_decoder_model/entropy_decoder_model_kernel_5.h:551: error: undefined reference to 'dlib::entropy_decoder_kernel_2::get_target(unsigned int)'
/usr/local/include/dlib/entropy_decoder_model/entropy_decoder_model_kernel_5.h:553: error: undefined reference to 'dlib::entropy_decoder_kernel_2::decode(unsigned int, unsigned int)'
/usr/local/include/dlib/image_loader/png_loader.h:173: error: undefined reference to 'dlib::png_loader::png_loader(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)'
/usr/local/include/dlib/image_loader/jpeg_loader.h:100: error: undefined reference to 'dlib::jpeg_loader::jpeg_loader(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)'
/usr/local/include/dlib/image_loader/webp_loader.h:107: error: undefined reference to 'dlib::webp_loader::webp_loader(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&)'
/usr/local/include/dlib/gui_widgets/widgets.h:3277: error: undefined reference to 'dlib::scrollable_region::set_total_rect_size(unsigned long, unsigned long)'
/usr/local/include/dlib/gui_widgets/widgets.h:3279: error: undefined reference to 'dlib::scrollable_region::set_total_rect_size(unsigned long, unsigned long)'
/usr/local/include/dlib/gui_widgets/widgets.h:3283: error: undefined reference to 'dlib::base_window::invalidate_rectangle(dlib::rectangle const&)'
/usr/local/include/dlib/gui_widgets/widgets.h:3288: error: undefined reference to 'dlib::popup_menu_region::disable()'
/usr/local/include/dlib/image_loader/image_loader.h:550: error: undefined reference to 'dlib::entropy_decoder_kernel_2::entropy_decoder_kernel_2()'
/usr/local/include/dlib/image_loader/image_loader.h:551: error: undefined reference to 'dlib::entropy_decoder_kernel_2::set_stream(std::istream&)'
/usr/local/include/dlib/image_loader/image_loader.h:550: error: undefined reference to 'dlib::entropy_decoder_kernel_2::~entropy_decoder_kernel_2()'
/usr/local/include/dlib/image_loader/image_loader.h:736: error: undefined reference to 'dlib::entropy_decoder_kernel_2::entropy_decoder_kernel_2()'
/usr/local/include/dlib/image_loader/image_loader.h:737: error: undefined reference to 'dlib::entropy_decoder_kernel_2::set_stream(std::istream&)'
/usr/local/include/dlib/image_loader/image_loader.h:736: error: undefined reference to 'dlib::entropy_decoder_kernel_2::~entropy_decoder_kernel_2()'
/usr/local/include/dlib/image_loader/png_loader.h:123: error: undefined reference to 'dlib::png_loader::is_gray() const'
/usr/local/include/dlib/image_loader/png_loader.h:126: error: undefined reference to 'dlib::png_loader::is_graya() const'
/usr/local/include/dlib/image_loader/png_loader.h:129: error: undefined reference to 'dlib::png_loader::is_rgb() const'
/usr/local/include/dlib/image_loader/png_loader.h:132: error: undefined reference to 'dlib::png_loader::is_rgba() const'
/usr/local/include/dlib/image_loader/png_loader.h:123: error: undefined reference to 'dlib::png_loader::is_gray() const'
/usr/local/include/dlib/image_loader/png_loader.h:126: error: undefined reference to 'dlib::png_loader::is_graya() const'
/usr/local/include/dlib/image_loader/png_loader.h:129: error: undefined reference to 'dlib::png_loader::is_rgb() const'
/usr/local/include/dlib/image_loader/png_loader.h:132: error: undefined reference to 'dlib::png_loader::is_rgba() const'
/usr/local/include/dlib/image_loader/jpeg_loader.h:51: error: undefined reference to 'dlib::jpeg_loader::is_gray() const'
/usr/local/include/dlib/image_loader/jpeg_loader.h:56: error: undefined reference to 'dlib::jpeg_loader::is_rgba() const'
/usr/local/include/dlib/image_loader/webp_loader.h:73: error: undefined reference to 'dlib::webp_loader::read_rgba(unsigned char*, unsigned long, int) const'
/usr/local/include/dlib/entropy_decoder_model/entropy_decoder_model_kernel_5.h:422: error: undefined reference to 'dlib::entropy_decoder_kernel_2::get_target(unsigned int)'
/usr/local/include/dlib/test_for_odr_violations.h:24: error: undefined reference to 'USER_ERROR__inconsistent_build_configuration__see_dlib_faq_1_'
/usr/local/include/dlib/test_for_odr_violations.h:51: error: undefined reference to 'DLIB_VERSION_MISMATCH_CHECK__EXPECTED_VERSION_19_24_99'
问题原因:找不到dlib相关的符号
解决方法:在pro文件中添加 LIBS += -L/usr/local/lib -ldlib
2.提示找不到dgesvd_ cblas_saxpy cblas_sscal
/usr/local/include/dlib/matrix/lapack/gesvd.h:38: error: undefined reference to 'dgesvd_'
/usr/local/include/dlib/matrix/matrix_blas_bindings.h:148: error: undefined reference to 'cblas_saxpy'
/usr/local/include/dlib/matrix/matrix_blas_bindings.h:177: error: undefined reference to 'cblas_sscal'
问题原因:dlib依赖lapack cblas,编译时需要显式的指明这两个所在的路径
解决方法:LIBS += -L/usr/lib/x86_64-linux-gnu -llapack -lcblas
3 我自己的环境有两个qt版本,一个是qt5.12.8,一个是qt5.15.2,5.15.2是自己安装的。5.12.8是安装系统自带的,libqtCore.so在/usr/lib/x86_64-linux-gnu下,新版本安装在/opt/Qt/5.15.2/gcc_64/lib目录下,我自己遇到的问题是LD_LIBRARY_PATH环境变量顺序有问题,原来的环境变量顺序是:/usr/lib/x86_64-linux-gnu:/opt/Qt/5.15.2/gcc_64/lib,这个顺序会造成先找到/usr/lib/x86_64-linux-gnu下的libqtcore.so,但是自己的工程配置的是5.15.2,这样会造成版本qt版本不对,找不到Qt_5.15宏,错误提示如下:
/usr/lib/x86_64-linux-gnu/libQt5Core.so.5: version `Qt_5.15' not found (required by /home/consys/qt_project/build-face_dlib-Desktop_Qt_5_15_2_GCC_64bit-Debug/face_dlib)
解决方法:调整LD_LIBRARY_PATH的顺序即可。
/opt/Qt/5.15.2/gcc_64/lib:/usr/lib/x86_64-linux-gnu
待优化问题:
1、图片中人脸识别时间特别长(20多秒),这明显不合理,需要回答几个问题,该代码通过什么方式进行人脸识别的?CNN?还是DNN?还是其它的技术?这个时间长度与图片分辨率是否有关系?将分辨率降为640*480后,时间明显缩短至10s组左右,但是还是时间太长,优化识别时间。
2、分析如果图像中人脸小于多少像素就不能识别?HOG算法 cnn与dnn模型他们之间在这方面的限制与原理。据我所知,部分模型训练时是按照80*80的去训练检测人脸的,小于人脸小于这个大小,有的模型无法识别,需要进行进一步的分析确认。
问题解决:
待优化问题中提到了几个问题,有些问题已经解决,下面将可能遇到的问题一一进行解决。
1. 单张图片人脸识别(detector(img))语句耗时特别长,时间长达20多秒。
答:这种情况必然是有问题的,因为如果是这样的耗时,则无法实现实时人脸识别,这明显是不合理的。查找该问题时我尝试了以下方法:(1)重新编译dlib,添加了-DUSE_AVX_INSTRUCTIONS=1 -DUSE_SSE2_INSTRUCTIONS=1编译选项,其中SSE2或者SSE4至少是需要打开的,AVX根据实际情况分析,若CPU支持avx指令集则应该配置上,若不支持,则无需配置。(2)关闭cuda选项,-DDLIB_USE_CUDA=0,此处需要再确认下,打开与关闭的影响(3)我的应用程序编译的是debug模式的,然而dlib通过查看dlib/CMakeLists.txt,发现默认的编译模式是release模式,这种情况下运行就是20多秒;然后当我将将应用程序改成release模式后,dlib检测人脸时间降至100ms左右;再尝试使用dnn模式检测人脸后,同样的图片检测时间降至50ms左右。由此可见,在使用dlib进行人脸识别时一定要注意应用程序的编译模式是否与库的编译模式匹配。
HOG算法检测人脸时间(hog算法,图片中只有一张人脸,图片放分辨率:640*480):
采用DNN深度学习来检测人脸时间(dnn算法,图片中只有一张人脸,图片放分辨率:640*480):
采用DNN深度学习算法来检测人脸耗时(dnn算法,图片中有24张人脸,图片分辨率:1986*1545):
结论:
- 图像中人脸越多耗时越长
- 图像分辨率越大,耗时越长
HOG算法检测人脸+检测图片分辨率(640*480)源码:
#include <iostream>
#include "dlib/image_processing/frontal_face_detector.h"
#include "dlib/gui_widgets.h"
#include "dlib/image_io.h"
#include <QDateTime>
#include <QDebug>using namespace dlib;
using namespace std;int main(int argc, char *argv[])
{auto detector = get_frontal_face_detector();auto path = "/home/consys/640_480_single_face.png";image_window win;array2d<unsigned char> img;load_image(img, path); //通过路径加载图像pyramid_up(img);qDebug()<<"detector begin: "<<QDateTime::currentMSecsSinceEpoch();std::vector<rectangle> dets = detector(img); //存储人脸矩阵qDebug()<<"detector end: "<<QDateTime::currentMSecsSinceEpoch();cout << "number of faces detected:" << dets.size()<< endl;win.clear_overlay(); //清除缓存win.set_image(img); //设置图像win.add_overlay(dets, rgb_pixel(255, 0, 0)); //添加一个overlaypause();return 0;
}
DNN算法采用的是dlib提供的example用例(dnn_face_recognition_ex.cpp):
// The contents of this file are in the public domain. See LICENSE_FOR_EXAMPLE_PROGRAMS.txt
/*This is an example illustrating the use of the deep learning tools from the dlib C++Library. In it, we will show how to do face recognition. This example uses thepretrained dlib_face_recognition_resnet_model_v1 model which is freely available fromthe dlib web site. This model has a 99.38% accuracy on the standard LFW facerecognition benchmark, which is comparable to other state-of-the-art methods for facerecognition as of February 2017.In this example, we will use dlib to do face clustering. Included in the examplesfolder is an image, bald_guys.jpg, which contains a bunch of photos of action moviestars Vin Diesel, The Rock, Jason Statham, and Bruce Willis. We will use dlib toautomatically find their faces in the image and then to automatically determine howmany people there are (4 in this case) as well as which faces belong to each person.Finally, this example uses a network with the loss_metric loss. Therefore, if you wantto learn how to train your own models, or to get a general introduction to this losslayer, you should read the dnn_metric_learning_ex.cpp anddnn_metric_learning_on_images_ex.cpp examples.
*/#include <dlib/dnn.h>
#include <dlib/gui_widgets.h>
#include <dlib/clustering.h>
#include <dlib/string.h>
#include <dlib/image_io.h>
#include <dlib/image_processing/frontal_face_detector.h>
#include <QDebug>
#include <QDateTime>using namespace dlib;
using namespace std;// ----------------------------------------------------------------------------------------// The next bit of code defines a ResNet network. It's basically copied
// and pasted from the dnn_imagenet_ex.cpp example, except we replaced the loss
// layer with loss_metric and made the network somewhat smaller. Go read the introductory
// dlib DNN examples to learn what all this stuff means.
//
// Also, the dnn_metric_learning_on_images_ex.cpp example shows how to train this network.
// The dlib_face_recognition_resnet_model_v1 model used by this example was trained using
// essentially the code shown in dnn_metric_learning_on_images_ex.cpp except the
// mini-batches were made larger (35x15 instead of 5x5), the iterations without progress
// was set to 10000, and the training dataset consisted of about 3 million images instead of
// 55. Also, the input layer was locked to images of size 150.
template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual = add_prev1<block<N,BN,1,tag1<SUBNET>>>;template <template <int,template<typename>class,int,typename> class block, int N, template<typename>class BN, typename SUBNET>
using residual_down = add_prev2<avg_pool<2,2,2,2,skip1<tag2<block<N,BN,2,tag1<SUBNET>>>>>>;template <int N, template <typename> class BN, int stride, typename SUBNET>
using block = BN<con<N,3,3,1,1,relu<BN<con<N,3,3,stride,stride,SUBNET>>>>>;template <int N, typename SUBNET> using ares = relu<residual<block,N,affine,SUBNET>>;
template <int N, typename SUBNET> using ares_down = relu<residual_down<block,N,affine,SUBNET>>;template <typename SUBNET> using alevel0 = ares_down<256,SUBNET>;
template <typename SUBNET> using alevel1 = ares<256,ares<256,ares_down<256,SUBNET>>>;
template <typename SUBNET> using alevel2 = ares<128,ares<128,ares_down<128,SUBNET>>>;
template <typename SUBNET> using alevel3 = ares<64,ares<64,ares<64,ares_down<64,SUBNET>>>>;
template <typename SUBNET> using alevel4 = ares<32,ares<32,ares<32,SUBNET>>>;using anet_type = loss_metric<fc_no_bias<128,avg_pool_everything<alevel0<alevel1<alevel2<alevel3<alevel4<max_pool<3,3,2,2,relu<affine<con<32,7,7,2,2,input_rgb_image_sized<150>>>>>>>>>>>>>;// ----------------------------------------------------------------------------------------std::vector<matrix<rgb_pixel>> jitter_image(const matrix<rgb_pixel>& img);// ----------------------------------------------------------------------------------------int main(int argc, char** argv) try
{if (argc != 2){cout << "Run this example by invoking it like this: " << endl;cout << " ./dnn_face_recognition_ex faces/bald_guys.jpg" << endl;cout << endl;cout << "You will also need to get the face landmarking model file as well as " << endl;cout << "the face recognition model file. Download and then decompress these files from: " << endl;cout << "http://dlib.net/files/shape_predictor_5_face_landmarks.dat.bz2" << endl;cout << "http://dlib.net/files/dlib_face_recognition_resnet_model_v1.dat.bz2" << endl;cout << endl;return 1;}// The first thing we are going to do is load all our models. First, since we need to// find faces in the image we will need a face detector:frontal_face_detector detector = get_frontal_face_detector();// We will also use a face landmarking model to align faces to a standard pose: (see face_landmark_detection_ex.cpp for an introduction)shape_predictor sp;deserialize("shape_predictor_5_face_landmarks.dat") >> sp;// And finally we load the DNN responsible for face recognition.anet_type net;deserialize("dlib_face_recognition_resnet_model_v1.dat") >> net;matrix<rgb_pixel> img;load_image(img, argv[1]);// Display the raw image on the screenimage_window win(img);// Run the face detector on the image of our action heroes, and for each face extract a// copy that has been normalized to 150x150 pixels in size and appropriately rotated// and centered.std::vector<matrix<rgb_pixel>> faces;qDebug()<<"detector begin time : "<<QDateTime::currentMSecsSinceEpoch();for (auto face : detector(img)){auto shape = sp(img, face);matrix<rgb_pixel> face_chip;extract_image_chip(img, get_face_chip_details(shape,150,0.25), face_chip);faces.push_back(move(face_chip));// Also put some boxes on the faces so we can see that the detector is finding// them.win.add_overlay(face);}qDebug()<<"detector end time : "<<QDateTime::currentMSecsSinceEpoch()<<faces.size();if (faces.size() == 0){cout << "No faces found in image!" << endl;return 1;}// This call asks the DNN to convert each face image in faces into a 128D vector.// In this 128D vector space, images from the same person will be close to each other// but vectors from different people will be far apart. So we can use these vectors to// identify if a pair of images are from the same person or from different people.std::vector<matrix<float,0,1>> face_descriptors = net(faces);// In particular, one simple thing we can do is face clustering. This next bit of code// creates a graph of connected faces and then uses the Chinese whispers graph clustering// algorithm to identify how many people there are and which faces belong to whom.std::vector<sample_pair> edges;for (size_t i = 0; i < face_descriptors.size(); ++i){for (size_t j = i; j < face_descriptors.size(); ++j){// Faces are connected in the graph if they are close enough. Here we check if// the distance between two face descriptors is less than 0.6, which is the// decision threshold the network was trained to use. Although you can// certainly use any other threshold you find useful.if (length(face_descriptors[i]-face_descriptors[j]) < 0.6)edges.push_back(sample_pair(i,j));}}std::vector<unsigned long> labels;const auto num_clusters = chinese_whispers(edges, labels);// This will correctly indicate that there are 4 people in the image.cout << "number of people found in the image: "<< num_clusters << endl;// Now let's display the face clustering results on the screen. You will see that it// correctly grouped all the faces.std::vector<image_window> win_clusters(num_clusters);for (size_t cluster_id = 0; cluster_id < num_clusters; ++cluster_id){std::vector<matrix<rgb_pixel>> temp;for (size_t j = 0; j < labels.size(); ++j){if (cluster_id == labels[j])temp.push_back(faces[j]);}win_clusters[cluster_id].set_title("face cluster " + cast_to_string(cluster_id));win_clusters[cluster_id].set_image(tile_images(temp));}// Finally, let's print one of the face descriptors to the screen.cout << "face descriptor for one face: " << trans(face_descriptors[0]) << endl;// It should also be noted that face recognition accuracy can be improved if jittering// is used when creating face descriptors. In particular, to get 99.38% on the LFW// benchmark you need to use the jitter_image() routine to compute the descriptors,// like so:matrix<float,0,1> face_descriptor = mean(mat(net(jitter_image(faces[0]))));cout << "jittered face descriptor for one face: " << trans(face_descriptor) << endl;// If you use the model without jittering, as we did when clustering the bald guys, it// gets an accuracy of 99.13% on the LFW benchmark. So jittering makes the whole// procedure a little more accurate but makes face descriptor calculation slower.cout << "hit enter to terminate" << endl;cin.get();
}
catch (std::exception& e)
{cout << e.what() << endl;
}// ----------------------------------------------------------------------------------------std::vector<matrix<rgb_pixel>> jitter_image(const matrix<rgb_pixel>& img)
{// All this function does is make 100 copies of img, all slightly jittered by being// zoomed, rotated, and translated a little bit differently. They are also randomly// mirrored left to right.thread_local dlib::rand rnd;std::vector<matrix<rgb_pixel>> crops;for (int i = 0; i < 100; ++i)crops.push_back(jitter_image(img,rnd));return crops;
}// ----------------------------------------------------------------------------------------