00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029 #include <cvaux.h>
00030 #include <image_transport/image_transport.h>
00031 #include <opencv2/highgui/highgui.hpp>
00032 #include <sys/stat.h>
00033 #include <ros/package.h>
00034
00035 using namespace std;
00036 static const char *faceCascadeFilename = "haarcascade_frontalface_alt.xml";
00037 const std::string path = ros::package::getPath("face_recognition");
00038
00039 class FaceRecognitionLib
00040 {
00041 public:
00042
00043 int SAVE_EIGENFACE_IMAGES;
00044 IplImage ** faceImgArr;
00045 vector<string> personNames;
00046 int faceWidth;
00047 int faceHeight;
00048 int nPersons;
00049 int nTrainFaces;
00050 int nEigens;
00051 IplImage * pAvgTrainImg;
00052 IplImage ** eigenVectArr;
00053 CvMat * eigenValMat;
00054 CvMat * projectedTrainFaceMat;
00055 CvHaarClassifierCascade* faceCascade;
00056 CvMat * trainPersonNumMat;
00057 bool database_updated;
00058
00059 bool learn(const char *szFileTrain);
00060 void doPCA();
00061 void storeTrainingData();
00062 int loadTrainingData(CvMat ** pTrainPersonNumMat);
00063 int findNearestNeighbor(float * projectedTestFace);
00064 int findNearestNeighbor(float * projectedTestFace, float *pConfidence);
00065 int loadFaceImgArray(const char * filename);
00066 void storeEigenfaceImages();
00067 IplImage* convertImageToGreyscale(const IplImage *imageSrc);
00068 IplImage* cropImage(const IplImage *img, const CvRect region);
00069 IplImage* resizeImage(const IplImage *origImg, int newWidth, int newHeight);
00070 IplImage* convertFloatImageToUcharImage(const IplImage *srcImg);
00071 CvRect detectFaceInImage(const IplImage *inputImg, const CvHaarClassifierCascade* cascade );
00072 bool retrainOnline(void);
00073 FaceRecognitionLib()
00074 {
00075 SAVE_EIGENFACE_IMAGES = 1;
00076 faceImgArr= 0;
00077 faceWidth = 120;
00078 faceHeight = 90;
00079 nPersons = 0;
00080 nTrainFaces = 0;
00081 nEigens = 0;
00082 pAvgTrainImg = 0;
00083 eigenVectArr = 0;
00084 eigenValMat = 0;
00085 projectedTrainFaceMat = 0;
00086 database_updated = false;
00087
00089 chdir(path.c_str());
00090
00091
00092 faceCascade = (CvHaarClassifierCascade*)cvLoad(faceCascadeFilename, 0, 0, 0 );
00093 if( !faceCascade )
00094 {
00095 ROS_INFO("Could not load Haar cascade Face detection classifier in '%s'.", faceCascadeFilename);
00096 exit(1);
00097 }
00098
00099 mkdir("data",S_IRWXU | S_IRWXG | S_IRWXO);
00100
00101 trainPersonNumMat = 0;
00102 if( loadTrainingData( &trainPersonNumMat ) )
00103 {
00104 faceWidth = pAvgTrainImg->width;
00105 faceHeight = pAvgTrainImg->height;
00106 database_updated=true;
00107 }
00108 else
00109 {
00110 ROS_INFO("Will try to train from images");
00111 if(!retrainOnline())
00112 ROS_INFO("Could not train from images");
00113
00114 }
00115 }
00116 ~FaceRecognitionLib(void)
00117 {
00118 cvReleaseHaarClassifierCascade( &faceCascade );
00119 if(trainPersonNumMat) cvReleaseMat( &trainPersonNumMat );
00120 int i;
00121 if (faceImgArr)
00122 {
00123 for (i=0; i<nTrainFaces; i++)
00124 if (faceImgArr[i]) cvReleaseImage( &faceImgArr[i] );
00125 cvFree( &faceImgArr );
00126 }
00127 if(eigenVectArr)
00128 {
00129 for (i=0; i<nEigens; i++)
00130 if (eigenVectArr[i]) cvReleaseImage( &eigenVectArr[i] );
00131 cvFree( &eigenVectArr );
00132 }
00133 if(trainPersonNumMat) cvReleaseMat( &trainPersonNumMat );
00134 personNames.clear();
00135 if(pAvgTrainImg) cvReleaseImage( &pAvgTrainImg );
00136 if(eigenValMat) cvReleaseMat( &eigenValMat );
00137 if(projectedTrainFaceMat) cvReleaseMat( &projectedTrainFaceMat );
00138 }
00139
00140 };
00141
00142
00143
00144
00145 CvRect FaceRecognitionLib::detectFaceInImage(const IplImage *inputImg, const CvHaarClassifierCascade* cascade )
00146 {
00147 const CvSize minFeatureSize = cvSize(20, 20);
00148 const int flags = CV_HAAR_FIND_BIGGEST_OBJECT | CV_HAAR_DO_ROUGH_SEARCH;
00149 const float search_scale_factor = 1.1f;
00150 IplImage *detectImg;
00151 IplImage *greyImg = 0;
00152 CvMemStorage* storage;
00153 CvRect rc;
00154
00155 CvSeq* rects;
00156
00157
00158 storage = cvCreateMemStorage(0);
00159 cvClearMemStorage( storage );
00160
00161
00162 detectImg = (IplImage*)inputImg;
00163 if (inputImg->nChannels > 1)
00164 {
00165 greyImg = cvCreateImage(cvSize(inputImg->width, inputImg->height), IPL_DEPTH_8U, 1 );
00166 cvCvtColor( inputImg, greyImg, CV_BGR2GRAY );
00167 detectImg = greyImg;
00168 }
00169
00170
00171
00172 rects = cvHaarDetectObjects( detectImg, (CvHaarClassifierCascade*)cascade, storage,
00173 search_scale_factor, 3, flags, minFeatureSize );
00174
00175
00176
00177
00178 if (rects->total > 0)
00179 {
00180 rc = *(CvRect*)cvGetSeqElem( rects, 0 );
00181 }
00182 else
00183 rc = cvRect(-1,-1,-1,-1);
00184
00185
00186
00187 if (greyImg)
00188 cvReleaseImage( &greyImg );
00189 cvReleaseMemStorage( &storage );
00190
00191 return rc;
00192 }
00193
00194
00195
00196
00197
00198 IplImage* FaceRecognitionLib::convertImageToGreyscale(const IplImage *imageSrc)
00199 {
00200 IplImage *imageGrey;
00201
00202
00203 if (imageSrc->nChannels == 3) {
00204 imageGrey = cvCreateImage( cvGetSize(imageSrc), IPL_DEPTH_8U, 1 );
00205 cvCvtColor( imageSrc, imageGrey, CV_BGR2GRAY );
00206 }
00207 else {
00208 imageGrey = cvCloneImage(imageSrc);
00209 }
00210 return imageGrey;
00211 }
00212
00213
00214
00215 IplImage* FaceRecognitionLib::resizeImage(const IplImage *origImg, int newWidth, int newHeight)
00216 {
00217 IplImage *outImg = 0;
00218 int origWidth;
00219 int origHeight;
00220 if (origImg) {
00221 origWidth = origImg->width;
00222 origHeight = origImg->height;
00223 }
00224 if (newWidth <= 0 || newHeight <= 0 || origImg == 0 || origWidth <= 0 || origHeight <= 0) {
00225 ROS_INFO("ERROR in resizeImage: Bad desired image size of %dx%d.", newWidth, newHeight);
00226 exit(1);
00227 }
00228
00229
00230 outImg = cvCreateImage(cvSize(newWidth, newHeight), origImg->depth, origImg->nChannels);
00231 if (newWidth > origImg->width && newHeight > origImg->height) {
00232
00233 cvResetImageROI((IplImage*)origImg);
00234 cvResize(origImg, outImg, CV_INTER_LINEAR);
00235 }
00236 else {
00237
00238 cvResetImageROI((IplImage*)origImg);
00239 cvResize(origImg, outImg, CV_INTER_AREA);
00240 }
00241
00242 return outImg;
00243 }
00244
00245
00246 IplImage* FaceRecognitionLib::cropImage(const IplImage *img, const CvRect region)
00247 {
00248 IplImage *imageTmp;
00249 IplImage *imageRGB;
00250 CvSize size;
00251 size.height = img->height;
00252 size.width = img->width;
00253
00254 if (img->depth != IPL_DEPTH_8U) {
00255 ROS_INFO("ERROR in cropImage: Unknown image depth of %d given in cropImage() instead of 8 bits per pixel.", img->depth);
00256 exit(1);
00257 }
00258
00259
00260 imageTmp = cvCreateImage(size, IPL_DEPTH_8U, img->nChannels);
00261 cvCopy(img, imageTmp, NULL);
00262
00263
00264
00265 cvSetImageROI(imageTmp, region);
00266
00267 size.width = region.width;
00268 size.height = region.height;
00269 imageRGB = cvCreateImage(size, IPL_DEPTH_8U, img->nChannels);
00270 cvCopy(imageTmp, imageRGB, NULL);
00271
00272 cvReleaseImage( &imageTmp );
00273 return imageRGB;
00274 }
00275
00276
00277
00278 IplImage* FaceRecognitionLib::convertFloatImageToUcharImage(const IplImage *srcImg)
00279 {
00280 IplImage *dstImg = 0;
00281 if ((srcImg) && (srcImg->width > 0 && srcImg->height > 0)) {
00282
00283
00284 double minVal, maxVal;
00285 cvMinMaxLoc(srcImg, &minVal, &maxVal);
00286
00287
00288
00289
00290 if (cvIsNaN(minVal) || minVal < -1e30)
00291 minVal = -1e30;
00292 if (cvIsNaN(maxVal) || maxVal > 1e30)
00293 maxVal = 1e30;
00294 if (maxVal-minVal == 0.0f)
00295 maxVal = minVal + 0.001;
00296
00297
00298 dstImg = cvCreateImage(cvSize(srcImg->width, srcImg->height), 8, 1);
00299 cvConvertScale(srcImg, dstImg, 255.0 / (maxVal - minVal), - minVal * 255.0 / (maxVal-minVal));
00300 }
00301 return dstImg;
00302 }
00303
00304
00305
00306
00307 void FaceRecognitionLib::storeEigenfaceImages()
00308 {
00309
00310 ROS_INFO("Saving the image of the average face as 'out_averageImage.bmp'.");
00311 cvSaveImage("out_averageImage.bmp", pAvgTrainImg);
00312
00313
00314 ROS_INFO("Saving the %d eigenvector images as 'out_eigenfaces.bmp'", nEigens);
00315 if (nEigens > 0) {
00316
00317 int COLUMNS = 8;
00318 int nCols = min(nEigens, COLUMNS);
00319 int nRows = 1 + (nEigens / COLUMNS);
00320 int w = eigenVectArr[0]->width;
00321 int h = eigenVectArr[0]->height;
00322 CvSize size;
00323 size = cvSize(nCols * w, nRows * h);
00324 IplImage *bigImg = cvCreateImage(size, IPL_DEPTH_8U, 1);
00325 for (int i=0; i<nEigens; i++) {
00326
00327 IplImage *byteImg = convertFloatImageToUcharImage(eigenVectArr[i]);
00328
00329 int x = w * (i % COLUMNS);
00330 int y = h * (i / COLUMNS);
00331 CvRect ROI = cvRect(x, y, w, h);
00332 cvSetImageROI(bigImg, ROI);
00333 cvCopyImage(byteImg, bigImg);
00334 cvResetImageROI(bigImg);
00335 cvReleaseImage(&byteImg);
00336 }
00337 cvSaveImage("out_eigenfaces.bmp", bigImg);
00338 cvReleaseImage(&bigImg);
00339 }
00340 }
00341
00342
00343 bool FaceRecognitionLib::learn(const char *szFileTrain)
00344 {
00345 int i, offset;
00346
00347
00348 ROS_INFO("Loading the training images in '%s'", szFileTrain);
00349 nTrainFaces = loadFaceImgArray(szFileTrain);
00350 ROS_INFO("Got %d training images.\n", nTrainFaces);
00351 if( nTrainFaces < 2 )
00352 {
00353 fprintf(stderr,
00354 "Need 2 or more training faces"
00355 "Input file contains only %d", nTrainFaces);
00356 return false;
00357 }
00358
00359
00360 doPCA();
00361
00362
00363 projectedTrainFaceMat = cvCreateMat( nTrainFaces, nEigens, CV_32FC1 );
00364 offset = projectedTrainFaceMat->step / sizeof(float);
00365 for(i=0; i<nTrainFaces; i++)
00366 {
00367
00368 cvEigenDecomposite(
00369 faceImgArr[i],
00370 nEigens,
00371 eigenVectArr,
00372 0, 0,
00373 pAvgTrainImg,
00374
00375 projectedTrainFaceMat->data.fl + i*offset);
00376 }
00377
00378
00379 storeTrainingData();
00380
00381
00382
00383
00384
00385 return true;
00386
00387 }
00388
00389
00390
00391 int FaceRecognitionLib::loadTrainingData(CvMat ** pTrainPersonNumMat)
00392 {
00393 CvFileStorage * fileStorage;
00394 int i;
00395
00396
00397 fileStorage = cvOpenFileStorage( "facedata.xml", 0, CV_STORAGE_READ );
00398 if( !fileStorage ) {
00399 ROS_INFO("Can't open training database file 'facedata.xml'.");
00400 return 0;
00401 }
00402
00403
00404 personNames.clear();
00405 nPersons = cvReadIntByName( fileStorage, 0, "nPersons", 0 );
00406 if (nPersons == 0) {
00407 ROS_INFO("No people found in the training database 'facedata.xml'.");
00408 return 0;
00409 }
00410
00411 for (i=0; i<nPersons; i++) {
00412 string sPersonName;
00413 char varname[200];
00414 sprintf( varname, "personName_%d", (i+1) );
00415 sPersonName = cvReadStringByName(fileStorage, 0, varname );
00416 personNames.push_back( sPersonName );
00417 }
00418
00419
00420 nEigens = cvReadIntByName(fileStorage, 0, "nEigens", 0);
00421 nTrainFaces = cvReadIntByName(fileStorage, 0, "nTrainFaces", 0);
00422 *pTrainPersonNumMat = (CvMat *)cvReadByName(fileStorage, 0, "trainPersonNumMat", 0);
00423 eigenValMat = (CvMat *)cvReadByName(fileStorage, 0, "eigenValMat", 0);
00424 projectedTrainFaceMat = (CvMat *)cvReadByName(fileStorage, 0, "projectedTrainFaceMat", 0);
00425 pAvgTrainImg = (IplImage *)cvReadByName(fileStorage, 0, "avgTrainImg", 0);
00426 eigenVectArr = (IplImage **)cvAlloc(nTrainFaces*sizeof(IplImage *));
00427 for(i=0; i<nEigens; i++)
00428 {
00429 char varname[200];
00430 sprintf( varname, "eigenVect_%d", i );
00431 eigenVectArr[i] = (IplImage *)cvReadByName(fileStorage, 0, varname, 0);
00432 }
00433
00434
00435 cvReleaseFileStorage( &fileStorage );
00436
00437 ROS_INFO("Training data loaded (%d training images of %d people):", nTrainFaces, nPersons);
00438 ROS_INFO("People: ");
00439 if (nPersons > 0)
00440 ROS_INFO("<%s>", personNames[0].c_str());
00441 for (i=1; i<nPersons; i++) {
00442 ROS_INFO(", <%s>", personNames[i].c_str());
00443 }
00444 database_updated = true;
00445 return 1;
00446 }
00447
00448
00449
00450 void FaceRecognitionLib::storeTrainingData()
00451 {
00452 CvFileStorage * fileStorage;
00453 int i;
00454
00455
00456 fileStorage = cvOpenFileStorage( "facedata.xml", 0, CV_STORAGE_WRITE );
00457
00458
00459 cvWriteInt( fileStorage, "nPersons", nPersons );
00460 for (i=0; i<nPersons; i++) {
00461 char varname[200];
00462 sprintf( varname, "personName_%d", (i+1) );
00463 cvWriteString(fileStorage, varname, personNames[i].c_str(), 0);
00464 }
00465
00466
00467 cvWriteInt( fileStorage, "nEigens", nEigens );
00468 cvWriteInt( fileStorage, "nTrainFaces", nTrainFaces );
00469 cvWrite(fileStorage, "trainPersonNumMat", trainPersonNumMat, cvAttrList(0,0));
00470 cvWrite(fileStorage, "eigenValMat", eigenValMat, cvAttrList(0,0));
00471 cvWrite(fileStorage, "projectedTrainFaceMat", projectedTrainFaceMat, cvAttrList(0,0));
00472 cvWrite(fileStorage, "avgTrainImg", pAvgTrainImg, cvAttrList(0,0));
00473 for(i=0; i<nEigens; i++)
00474 {
00475 char varname[200];
00476 sprintf( varname, "eigenVect_%d", i );
00477 cvWrite(fileStorage, varname, eigenVectArr[i], cvAttrList(0,0));
00478 }
00479
00480
00481 cvReleaseFileStorage( &fileStorage );
00482 }
00483
00484
00485 int FaceRecognitionLib::findNearestNeighbor(float * projectedTestFace, float *pConfidence)
00486 {
00487
00488 double leastDistSq = DBL_MAX;
00489 int i, iTrain, iNearest = 0;
00490
00491 for(iTrain=0; iTrain<nTrainFaces; iTrain++)
00492 {
00493 double distSq=0;
00494
00495 for(i=0; i<nEigens; i++)
00496 {
00497 float d_i = projectedTestFace[i] - projectedTrainFaceMat->data.fl[iTrain*nEigens + i];
00498 #ifdef USE_MAHALANOBIS_DISTANCE
00499 distSq += d_i*d_i / eigenValMat->data.fl[i];
00500 #else
00501 distSq += d_i*d_i;
00502 #endif
00503 }
00504
00505 if(distSq < leastDistSq)
00506 {
00507 leastDistSq = distSq;
00508 iNearest = iTrain;
00509 }
00510 }
00511
00512
00513
00514
00515 *pConfidence = 1.0f - sqrt( leastDistSq / (float)(nTrainFaces * nEigens) ) / 255.0f;
00516
00517
00518 return iNearest;
00519 }
00520
00521
00522
00523 void FaceRecognitionLib::doPCA()
00524 {
00525 int i;
00526 CvTermCriteria calcLimit;
00527 CvSize faceImgSize;
00528
00529
00530 nEigens = nTrainFaces-1;
00531
00532
00533 faceImgSize.width = faceImgArr[0]->width;
00534 faceImgSize.height = faceImgArr[0]->height;
00535 eigenVectArr = (IplImage**)cvAlloc(sizeof(IplImage*) * nEigens);
00536 for(i=0; i<nEigens; i++)
00537 eigenVectArr[i] = cvCreateImage(faceImgSize, IPL_DEPTH_32F, 1);
00538
00539
00540 eigenValMat = cvCreateMat( 1, nEigens, CV_32FC1 );
00541
00542
00543 pAvgTrainImg = cvCreateImage(faceImgSize, IPL_DEPTH_32F, 1);
00544
00545
00546 calcLimit = cvTermCriteria( CV_TERMCRIT_ITER, nEigens, 1);
00547 ROS_INFO("**** nTrainFaces: %d",nTrainFaces);
00548
00549 cvCalcEigenObjects(
00550 nTrainFaces,
00551 (void*)faceImgArr,
00552 (void*)eigenVectArr,
00553 CV_EIGOBJ_NO_CALLBACK,
00554 0,
00555 0,
00556 &calcLimit,
00557 pAvgTrainImg,
00558 eigenValMat->data.fl);
00559
00560 cvNormalize(eigenValMat, eigenValMat, 1, 0, CV_L1, 0);
00561 }
00562
00563
00564 int FaceRecognitionLib::loadFaceImgArray(const char * filename)
00565 {
00566 FILE * imgListFile = 0;
00567 char imgFilename[512];
00568 int iFace, nFaces=0;
00569 int i;
00570 IplImage *pfaceImg;
00571 IplImage *psizedImg;
00572 IplImage *pequalizedImg;
00573
00574 if( !(imgListFile = fopen(filename, "r")) )
00575 {
00576 fprintf(stderr, "Can\'t open file %s\n", filename);
00577 return 0;
00578 }
00579
00580
00581 while( fgets(imgFilename, 512, imgListFile) ) ++nFaces;
00582 rewind(imgListFile);
00583
00584
00585 faceImgArr = (IplImage **)cvAlloc( nFaces*sizeof(IplImage *) );
00586 trainPersonNumMat = cvCreateMat( 1, nFaces, CV_32SC1 );
00587
00588 personNames.clear();
00589 nPersons = 0;
00590
00591
00592 for(iFace=0; iFace<nFaces; iFace++)
00593 {
00594 char personName[256];
00595 string sPersonName;
00596 int personNumber;
00597
00598 fscanf(imgListFile, "%d %s %s", &personNumber, personName, imgFilename);
00599 sPersonName = personName;
00600
00601
00602
00603 if (personNumber > nPersons) {
00604
00605 for (i=nPersons; i < personNumber; i++) {
00606 personNames.push_back( sPersonName );
00607 }
00608 nPersons = personNumber;
00609
00610 }
00611
00612
00613 trainPersonNumMat->data.i[iFace] = personNumber;
00614
00615
00616 pfaceImg = cvLoadImage(imgFilename, CV_LOAD_IMAGE_GRAYSCALE);
00617 psizedImg = resizeImage(pfaceImg, faceWidth, faceHeight);
00618
00619 pequalizedImg = cvCreateImage(cvGetSize(psizedImg), 8, 1);
00620 cvEqualizeHist(psizedImg, pequalizedImg);
00621 faceImgArr[iFace] = pequalizedImg;
00622 cvReleaseImage( &pfaceImg );cvReleaseImage( &psizedImg );
00623 if( !faceImgArr[iFace] )
00624 {
00625 fprintf(stderr, "Can\'t load image from %s\n", imgFilename);
00626 return 0;
00627 }
00628 }
00629
00630 fclose(imgListFile);
00631
00632 ROS_INFO("Data loaded from '%s': (%d images of %d people).\n", filename, nFaces, nPersons);
00633 ROS_INFO("People: ");
00634 if (nPersons > 0)
00635 ROS_INFO("<%s>", personNames[0].c_str());
00636 for (i=1; i<nPersons; i++) {
00637 ROS_INFO(", <%s>", personNames[i].c_str());
00638 }
00639 ROS_INFO(".\n");
00640
00641 return nFaces;
00642 }
00643
00644
00645
00646
00647
00648 bool FaceRecognitionLib::retrainOnline(void)
00649 {
00650
00651 if(trainPersonNumMat) {cvReleaseMat( &trainPersonNumMat );trainPersonNumMat=0;}
00652 int i;
00653 if (faceImgArr)
00654 {
00655 for (i=0; i<nTrainFaces; i++)
00656 if (faceImgArr[i]) {cvReleaseImage( &faceImgArr[i] );}
00657 cvFree( &faceImgArr );
00658 faceImgArr=0;
00659 }
00660 if(eigenVectArr)
00661 {
00662 for (i=0; i<nEigens; i++)
00663 if (eigenVectArr[i]) {cvReleaseImage( &eigenVectArr[i] );}
00664 cvFree( &eigenVectArr );
00665 eigenVectArr=0;
00666 }
00667
00668 if(trainPersonNumMat) {cvReleaseMat( &trainPersonNumMat ); trainPersonNumMat=0;}
00669 personNames.clear();
00670 nPersons = 0;
00671 nTrainFaces = 0;
00672 nEigens = 0;
00673 if(pAvgTrainImg) {cvReleaseImage( &pAvgTrainImg ); pAvgTrainImg=0;}
00674 if(eigenValMat) {cvReleaseMat( &eigenValMat );eigenValMat=0;}
00675 if(projectedTrainFaceMat) {cvReleaseMat( &projectedTrainFaceMat );projectedTrainFaceMat=0;}
00676
00677 if(!learn("train.txt"))
00678 return(false);
00679 database_updated=true;
00680 return(true);
00681
00682 }