$search
00001 #ifndef _LITERATE_PR2_TEXT_DETECT_ 00002 #define _LITERATE_PR2_TEXT_DETECT_ 00003 00004 #include "opencv2/core/core.hpp" 00005 #include "cv.h" 00006 #include "highgui.h" 00007 00008 using namespace cv; 00009 using namespace std; 00010 00011 class Rotated 00012 { 00013 public: 00014 Rotated(Mat m, Rect c) 00015 { 00016 rotated_img = m; 00017 coords = c; 00018 } 00019 ~Rotated() 00020 { 00021 } 00022 Mat rotated_img; 00023 Rect coords; 00024 }; 00025 00026 class DetectText 00027 { 00028 public: 00029 DetectText(); 00030 ~DetectText(); 00031 00032 std::vector<Rotated> rotated; 00033 00034 /* API */ 00035 void detect(string filename); 00036 void detect(Mat& image); 00037 00038 /* read useful files */ 00039 void readLetterCorrelation(const char* filename); 00040 00041 void readWordList(const char* filename); 00042 00043 /* getters */ 00044 Mat& getDetection(); 00045 00046 vector<string>& getWords(); 00047 00048 vector<Rect>& getBoxesBothSides(); 00049 00050 /* tests */ 00051 00052 void testMergePairs(); 00053 00054 void testEditDistance(); 00055 00056 void testGetCorrelationIndex(); 00057 00058 void testInsertToList(); 00059 00060 private: 00061 /* internal structures */ 00062 enum Mode 00063 { 00064 IMAGE = 1, STREAM = 2 00065 }; 00066 00067 enum FontColor 00068 { 00069 BRIGHT = 1, DARK = 2 00070 }; 00071 00072 enum Purpose 00073 { 00074 UPDATE = 1, REFINE = 2 00075 }; 00076 00077 enum Result 00078 { 00079 COARSE = 1, FINE = 2 00080 }; 00081 00082 struct Pair 00083 { 00084 Pair(int left, int right) : 00085 left(left), right(right) 00086 { 00087 } 00088 int left; 00089 int right; 00090 }; 00091 00092 struct Word 00093 { 00094 Word() : 00095 word(), score(1000) 00096 { 00097 } 00098 Word(string word, float score) : 00099 word(word), score(score) 00100 { 00101 } 00102 string word; 00103 float score; 00104 }; 00105 00106 /* pipeline for detecting black/white words*/ 00107 void detect(); 00108 00109 void preprocess(Mat &image); 00110 00111 void pipeline(int blackWhite); 00112 00113 void disposal(); 00114 00115 void strokeWidthTransform(const Mat &image, Mat &swtmap, int searchDirection); 00116 00117 /* for each edge point, search along gradient 00118 * direction compute stroke width 00119 * searchDirection: 1 for along gradient, -1 for opposite 00120 * purpose: 1 for compute, 2 for refine 00121 */ 00122 void updateStrokeWidth(Mat &swtmap, vector<Point> &startPoints, vector<Point> &strokePoints, int searchDirection, 00123 Purpose purpose); 00124 00125 int connectComponentAnalysis(const Mat& swtmap, Mat& ccmap); 00126 00127 void identifyLetters(const Mat& swtmap, const Mat& ccmap); 00128 00129 void groupLetters(const Mat& swtmap, const Mat& ccmap); 00130 00131 void chainPairs(Mat& ccmap); 00132 00133 void findRotationangles(int blackWhite); 00134 00135 void filterBoundingBoxes(vector<Rect>& boundingBoxes, Mat& ccmap, int rejectRatio); 00136 00137 void chainToBox(vector<vector<int> >& chain, vector<Rect>& boundingBox); 00138 00139 void overlapBoundingBoxes(vector<Rect>& boundingBoxes); 00140 00141 void overlayText(vector<Rect>& box, vector<string>& text); 00142 00143 void ocrRead(vector<Rect>& boundingBoxes); 00144 00145 float ocrRead(const Mat& imagePatch, string& output, int actual); 00146 00147 float spellCheck(string& str, string& output, int method); 00148 00149 Mat filterPatch(const Mat& patch); 00150 00151 // helper functions 00152 int ImageAdjust(IplImage* src, IplImage* dst, double low, double high, double bottom, double top, double gamma); 00153 00154 int countInnerLetterCandidates(bool* array); 00155 00156 float getMeanIntensity(const Mat& ccmap, const Rect& rect, int element); 00157 00158 float getMedianStrokeWidth(const Mat& ccmap, const Mat& swtmap, const Rect& rect, int element); 00159 00160 void mergePairs(const vector<Pair>& groups, vector<vector<int> >& chains); 00161 00162 bool mergePairs(const vector<vector<int> >& initialChains, vector<vector<int> >& chains); 00163 00164 void merge(const vector<int>& token, vector<int>& chain); 00165 00166 static int editDistance(const string& s, const string& t); 00167 00168 float editDistanceFont(const string& s, const string& t); 00169 00170 int getCorrelationIndex(char letter); 00171 00172 void getNearestWord(const string& str, string& nearestWord); 00173 00174 void getTopkWords(const string& str, const int k, vector<Word>& words); 00175 00176 float insertToList(vector<Word>& words, Word& word); 00177 00178 string& trim(string& str); 00179 00180 static bool spaticalOrder(Rect a, Rect b); 00181 00182 // display intermidate results 00183 void showEdgeMap(); 00184 00185 void showCcmap(Mat& ccmap); 00186 00187 void showSwtmap(Mat& swtmap); 00188 00189 void showLetterDetection(); 00190 00191 void showLetterGroup(); 00192 00193 void showBoundingBoxes(vector<Rect>& boxes); 00194 00195 void showBoundingBoxes(vector<Rect>& boxes, vector<bool>& text); 00196 // tests 00197 void testEdgePoints(vector<Point> &edgepoints); 00198 00199 /***** variables *******/ 00200 00201 struct bgr 00202 { 00203 uchar b; 00204 uchar g; 00205 uchar r; 00206 }; 00207 bool eval; //true=evaluation false=standard 00208 00209 // these variables stays for the same image 00210 Mat originalImage_; 00211 Mat image_; // gray scale to be processed 00212 Mat detection_; 00213 float maxStrokeWidth_; 00214 float initialStrokeWidth_; 00215 Mat edgemap_; 00216 Mat theta_; 00217 bool firstPass_; // white: 1, black : 0 00218 vector<Point> edgepoints_; 00219 00220 Mat correlation_; // read from arg[1] 00221 vector<string> wordList_; // read from arg[2] 00222 Mode mode_; // streaming or images 00223 00224 vector<Rect> boxesBothSides_; 00225 vector<string> wordsBothSides_; 00226 vector<float> boxesScores_; 00227 00228 vector<bool> boxInbox_; 00229 00230 FontColor fontColor_; 00231 Result result_; 00232 // these variables should be cleaned between calculations 00233 vector<Rect> componentsRoi_; 00234 bool *isLetterComponects_; 00235 bool *isGrouped_; 00236 vector<bool*> innerComponents_; 00237 00238 vector<Pair> horizontalLetterGroups_; 00239 vector<Pair> verticalLetterGroups_; 00240 vector<vector<int> > horizontalChains_; 00241 vector<vector<int> > verticalChains_; 00242 00243 vector<Rect> boundingBoxes_; 00244 00245 float *componentsMeanIntensity_; 00246 float *componentsMedianStrokeWidth_; 00247 00248 size_t nComponent_; 00249 float maxLetterHeight_; 00250 float minLetterHeight_; 00251 00252 string filename_; 00253 string outputPrefix_; 00254 00255 int textDisplayOffset_; 00256 00257 }; 00258 00259 #endif