00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00019
00020 #ifndef TESSERACT_API_BASEAPI_H__
00021 #define TESSERACT_API_BASEAPI_H__
00022
00023 #include <stdio.h>
00024
00025
00026
00027 #include "platform.h"
00028 #include "apitypes.h"
00029 #include "thresholder.h"
00030 #include "unichar.h"
00031 #include "tesscallback.h"
00032 #include "publictypes.h"
00033 #include "pageiterator.h"
00034 #include "resultiterator.h"
00035
00036 template <typename T> class GenericVector;
00037 class PAGE_RES;
00038 class PAGE_RES_IT;
00039 class ParagraphModel;
00040 class BlamerBundle;
00041 class BLOCK_LIST;
00042 class DENORM;
00043 class IMAGE;
00044 class MATRIX;
00045 class PBLOB;
00046 class ROW;
00047 class STRING;
00048 class WERD;
00049 struct Pix;
00050 struct Box;
00051 struct Pixa;
00052 struct Boxa;
00053 class ETEXT_DESC;
00054 struct OSResults;
00055 class TBOX;
00056 class UNICHARSET;
00057
00058
00059
00060 struct list_rec;
00061 typedef list_rec *LIST;
00062
00063 #define MAX_NUM_INT_FEATURES 512
00064 struct INT_FEATURE_STRUCT;
00065 typedef INT_FEATURE_STRUCT *INT_FEATURE;
00066 typedef INT_FEATURE_STRUCT INT_FEATURE_ARRAY[MAX_NUM_INT_FEATURES];
00067 struct TBLOB;
00068
00069 namespace tesseract {
00070
00071 class CubeRecoContext;
00072 class Dawg;
00073 class Dict;
00074 class EquationDetect;
00075 class LTRResultIterator;
00076 class MutableIterator;
00077 class Tesseract;
00078 class Trie;
00079 class Wordrec;
00080
00081 typedef int (Dict::*DictFunc)(void* void_dawg_args,
00082 UNICHAR_ID unichar_id, bool word_end) const;
00083 typedef double (Dict::*ProbabilityInContextFunc)(const char* lang,
00084 const char* context,
00085 int context_bytes,
00086 const char* character,
00087 int character_bytes);
00088 typedef void (Wordrec::*FillLatticeFunc)(const MATRIX &ratings,
00089 const LIST &best_choices,
00090 const UNICHARSET &unicharset,
00091 BlamerBundle *blamer_bundle);
00092 typedef TessCallback3<const UNICHARSET &, int, PAGE_RES *> TruthCallback;
00093
00102 class TESS_API TessBaseAPI {
00103 public:
00104 TessBaseAPI();
00105 virtual ~TessBaseAPI();
00106
00110 static const char* Version();
00111
00116 void SetInputName(const char* name);
00117
00119 void SetOutputName(const char* name);
00120
00136 bool SetVariable(const char* name, const char* value);
00137 bool SetDebugVariable(const char* name, const char* value);
00138
00143 bool GetIntVariable(const char *name, int *value) const;
00144 bool GetBoolVariable(const char *name, bool *value) const;
00145 bool GetDoubleVariable(const char *name, double *value) const;
00146
00151 const char *GetStringVariable(const char *name) const;
00152
00156 void PrintVariables(FILE *fp) const;
00157
00161 bool GetVariableAsString(const char *name, STRING *val);
00162
00201 int Init(const char* datapath, const char* language, OcrEngineMode mode,
00202 char **configs, int configs_size,
00203 const GenericVector<STRING> *vars_vec,
00204 const GenericVector<STRING> *vars_values,
00205 bool set_only_non_debug_params);
00206 int Init(const char* datapath, const char* language, OcrEngineMode oem) {
00207 return Init(datapath, language, oem, NULL, 0, NULL, NULL, false);
00208 }
00209 int Init(const char* datapath, const char* language) {
00210 return Init(datapath, language, OEM_DEFAULT, NULL, 0, NULL, NULL, false);
00211 }
00212
00221 const char* GetInitLanguagesAsString() const;
00222
00228 void GetLoadedLanguagesAsVector(GenericVector<STRING>* langs) const;
00229
00236 int InitLangMod(const char* datapath, const char* language);
00237
00242 void InitForAnalysePage();
00243
00250 void ReadConfigFile(const char* filename);
00252 void ReadDebugConfigFile(const char* filename);
00253
00259 void SetPageSegMode(PageSegMode mode);
00260
00262 PageSegMode GetPageSegMode() const;
00263
00281 char* TesseractRect(const unsigned char* imagedata,
00282 int bytes_per_pixel, int bytes_per_line,
00283 int left, int top, int width, int height);
00284
00289 void ClearAdaptiveClassifier();
00290
00297
00298
00308 void SetImage(const unsigned char* imagedata, int width, int height,
00309 int bytes_per_pixel, int bytes_per_line);
00310
00321 void SetImage(const Pix* pix);
00322
00327 void SetSourceResolution(int ppi);
00328
00334 void SetRectangle(int left, int top, int width, int height);
00335
00343 void SetThresholder(ImageThresholder* thresholder) {
00344 if (thresholder_ != NULL)
00345 delete thresholder_;
00346 thresholder_ = thresholder;
00347 ClearResults();
00348 }
00349
00355 Pix* GetThresholdedImage();
00356
00362 Boxa* GetRegions(Pixa** pixa);
00363
00371 Boxa* GetTextlines(Pixa** pixa, int** blockids);
00372
00381 Boxa* GetStrips(Pixa** pixa, int** blockids);
00382
00388 Boxa* GetWords(Pixa** pixa);
00389
00398 Boxa* GetConnectedComponents(Pixa** cc);
00399
00408 Boxa* GetComponentImages(PageIteratorLevel level,
00409 bool text_only,
00410 Pixa** pixa, int** blockids);
00411
00418 int GetThresholdedImageScaleFactor() const;
00419
00425 void DumpPGM(const char* filename);
00426
00438 PageIterator* AnalyseLayout();
00439
00446 int Recognize(ETEXT_DESC* monitor);
00447
00454 int RecognizeForChopTest(ETEXT_DESC* monitor);
00455
00472 bool ProcessPages(const char* filename,
00473 const char* retry_config, int timeout_millisec,
00474 STRING* text_out);
00475
00487 bool ProcessPage(Pix* pix, int page_index, const char* filename,
00488 const char* retry_config, int timeout_millisec,
00489 STRING* text_out);
00490
00499 ResultIterator* GetIterator();
00500
00509 MutableIterator* GetMutableIterator();
00510
00515 char* GetUTF8Text();
00516
00522 char* GetHOCRText(int page_number);
00530 char* GetBoxText(int page_number);
00536 char* GetUNLVText();
00538 int MeanTextConf();
00545 int* AllWordConfidences();
00546
00557 bool AdaptToWordStr(PageSegMode mode, const char* wordstr);
00558
00565 void Clear();
00566
00573 void End();
00574
00581 int IsValidWord(const char *word);
00582
00583 bool GetTextDirection(int* out_offset, float* out_slope);
00584
00586 void SetDictFunc(DictFunc f);
00587
00591 void SetProbabilityInContextFunc(ProbabilityInContextFunc f);
00592
00594 void SetFillLatticeFunc(FillLatticeFunc f);
00595
00600 bool DetectOS(OSResults*);
00601
00603 void GetFeaturesForBlob(TBLOB* blob, const DENORM& denorm,
00604 INT_FEATURE_ARRAY int_features,
00605 int* num_features, int* FeatureOutlineIndex);
00606
00611 static ROW* FindRowForBox(BLOCK_LIST* blocks, int left, int top,
00612 int right, int bottom);
00613
00618 void RunAdaptiveClassifier(TBLOB* blob, const DENORM& denorm,
00619 int num_max_matches,
00620 int* unichar_ids,
00621 float* ratings,
00622 int* num_matches_returned);
00623
00625 const char* GetUnichar(int unichar_id);
00626
00628 const Dawg *GetDawg(int i) const;
00629
00631 int NumDawgs() const;
00632
00634 static ROW *MakeTessOCRRow(float baseline, float xheight,
00635 float descender, float ascender);
00636
00638 static TBLOB *MakeTBLOB(Pix *pix);
00639
00645 static void NormalizeTBLOB(TBLOB *tblob, ROW *row,
00646 bool numeric_mode, DENORM *denorm);
00647
00648 Tesseract* const tesseract() const {
00649 return tesseract_;
00650 }
00651
00652 OcrEngineMode const oem() const {
00653 return last_oem_requested_;
00654 }
00655
00656 void InitTruthCallback(TruthCallback *cb) { truth_cb_ = cb; }
00657
00659 CubeRecoContext *GetCubeRecoContext() const;
00660
00661 void set_min_orientation_margin(double margin);
00662
00667 void GetBlockTextOrientations(int** block_orientation,
00668 bool** vertical_writing);
00669
00671 BLOCK_LIST* FindLinesCreateBlockList();
00672
00678 static void DeleteBlockList(BLOCK_LIST* block_list);
00679
00680
00681 protected:
00682
00684 TESS_LOCAL bool InternalSetImage();
00685
00690 TESS_LOCAL virtual void Threshold(Pix** pix);
00691
00696 TESS_LOCAL int FindLines();
00697
00699 TESS_LOCAL void ClearResults();
00700
00706 TESS_LOCAL LTRResultIterator* GetLTRIterator();
00707
00714 TESS_LOCAL int TextLength(int* blob_count);
00715
00717
00718
00723 TESS_LOCAL void AdaptToCharacter(const char *unichar_repr,
00724 int length,
00725 float baseline,
00726 float xheight,
00727 float descender,
00728 float ascender);
00729
00731 TESS_LOCAL PAGE_RES* RecognitionPass1(BLOCK_LIST* block_list);
00732 TESS_LOCAL PAGE_RES* RecognitionPass2(BLOCK_LIST* block_list, PAGE_RES* pass1_result);
00733
00735
00736 TESS_LOCAL void DetectParagraphs(int debug_level);
00737
00742 TESS_LOCAL static int TesseractExtractResult(char** text,
00743 int** lengths,
00744 float** costs,
00745 int** x0,
00746 int** y0,
00747 int** x1,
00748 int** y1,
00749 PAGE_RES* page_res);
00750
00751 TESS_LOCAL const PAGE_RES* GetPageRes() const {
00752 return page_res_;
00753 };
00754
00755
00756 protected:
00757 Tesseract* tesseract_;
00758 Tesseract* osd_tesseract_;
00759 EquationDetect* equ_detect_;
00760 ImageThresholder* thresholder_;
00761 GenericVector<ParagraphModel *>* paragraph_models_;
00762 BLOCK_LIST* block_list_;
00763 PAGE_RES* page_res_;
00764 STRING* input_file_;
00765 STRING* output_file_;
00766 STRING* datapath_;
00767 STRING* language_;
00768 OcrEngineMode last_oem_requested_;
00769 bool recognition_done_;
00770 TruthCallback *truth_cb_;
00771
00776
00777 int rect_left_;
00778 int rect_top_;
00779 int rect_width_;
00780 int rect_height_;
00781 int image_width_;
00782 int image_height_;
00783
00784
00785 };
00786
00787 }
00788
00789 #endif // TESSERACT_API_BASEAPI_H__