text_locator: TRecognizer.cpp Source File

Go to the documentation of this file.
00001 /*********************************************************************
00002         The MIT License (MIT)
00003 
00004         Copyright (c) <2013> <Vojtech Novak>
00005 
00006         Permission is hereby granted, free of charge, to any person obtaining a copy
00007         of this software and associated documentation files (the "Software"), to deal
00008         in the Software without restriction, including without limitation the rights
00009         to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
00010         copies of the Software, and to permit persons to whom the Software is
00011         furnished to do so, subject to the following conditions:
00012 
00013         The above copyright notice and this permission notice shall be included in
00014         all copies or substantial portions of the Software.
00015 
00016         THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
00017         IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
00018         FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
00019         AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
00020         LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
00021         OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
00022         THE SOFTWARE.
00023 *********************************************************************/
00024 
00025 #include "text_locator/TRecognizer.hpp"
00026 #include <opencv/highgui.h>
00027 #include <sstream>
00028 #include <cstdlib>
00029 #include <text_locator/utils.hpp>
00030 
00031 namespace ros_text_locator {
00032 
00033 TRecognizer::TRecognizer(tesseract::PageSegMode pageSegMode, std::string lang =
00034                 "eng") :
00035                 mode(pageSegMode), lang(lang), api(new tesseract::TessBaseAPI()) {
00036 }
00037 
00038 bool TRecognizer::initTesseract() {
00039         if (api->Init("/usr/share/tesseract-ocr/", lang.c_str())) {
00040                 fprintf(stderr, "Could not initialize tesseract.\n");
00041                 std::exit(EXIT_FAILURE);
00042                 return false;
00043         }
00044         return true;
00045 }
00046 
00047 TRecognizer::~TRecognizer() {
00048         api->Clear();
00049         api->End();
00050 }
00051 
00052 std::string TRecognizer::recognize(const cv::Mat &img, int x, int y, int width, int height) {
00053         initTesseract();
00054         std::string out = tesseract(img, x, y, width, height);
00055         return trim(out);
00056 }
00057 
00058 void TRecognizer::recognize(const cv::Mat &img, std::vector<Text2D>& text2d) {
00059         std::string in;
00060         initTesseract();
00061         for (size_t i = 0; i < text2d.size(); i++) {
00062                 Text2D& r = text2d.at(i);
00063                 in = tesseract(img, r.x, r.y, (r.x2 - r.x), (r.y2 - r.y));
00064                 r.text = trim(in);
00065         }
00066 }
00067 
00068 const char* TRecognizer::tesseract(const cv::Mat &image, int left, int top,
00069                 int width, int height) {
00070         api->SetImage(image.data, image.cols, image.rows, image.channels(),
00071                         image.step1());
00072         api->SetPageSegMode(mode);
00073         api->SetRectangle(left, top, width, height);
00074         char* outText = api->GetUTF8Text();
00075         //std::cout << (outText) << std::endl;
00076         return outText;
00077 }
00078 
00079 } /* namespace manipulator */