Tokenizer.h
Go to the documentation of this file.
00001 /*
00002  * Parser.h
00003  *
00004  *  Created on: Nov 11, 2013
00005  *      Author: dan
00006  */
00007 
00008 #ifndef TOKENIZER_PARSER_H_
00009 #define TOKENIZER_PARSER_H_
00010 
00011 
00012 #include <string>
00013 #include <sstream>
00014 #include <map>
00015 #include <vector>
00016 
00017 using namespace std;
00018 
00019 namespace Parser{
00020 
00021 inline string str(char c){
00022     stringstream s;
00023     if(c<20) s<<(int)c;
00024     else s<<c;
00025     return s.str();
00026 }
00027 
00028 template<class TokenType>
00029 struct Token{
00030     TokenType type;
00031     string text;
00032     size_t start, end;
00033     Token(TokenType type=TokenType(0), string text=""):type(type),text(text),start(0),end(0){}
00034     size_t size()const{ if(end<start) return 0; return end-start; }
00035 };
00036 template<class TokenType>
00037 static ostream& operator<<(ostream& out, const Token<TokenType>& t){
00038     out<<t.type;
00039     if(t.type==1) return out<<"{"<<t.start<<":"<<t.text<<":"<<t.end<<"}";
00040     return out;
00041 }
00042 
00043 struct tstream_state{
00044         size_t i;
00045         tstream_state(size_t i):i(i){}
00046 };
00047 template<class TokenType>
00048 struct TokenStream{
00049         vector< Token<TokenType> > st;
00050         size_t i;
00051         size_t end;
00052         TokenStream():i(0),end((size_t)-1){}
00053         void clear(){ i=0;st.clear(); }
00054         void reset(){ i=0; }
00055         TokenStream<TokenType>& operator<<(Token<TokenType> tkn){st.push_back(tkn);return *this;}
00056         TokenStream<TokenType>& operator>>(Token<TokenType>& tkn){if(eof()) return *this; tkn=st[i++]; return *this;}
00057         TokenStream<TokenType>& operator<<(TokenType tkn){st.push_back(Token<TokenType>(tkn,""));return *this;}
00058         TokenStream<TokenType>& operator>>(TokenType& tkn){if(eof()) return *this; tkn=st[i++].type; return *this;}
00059         TokenStream<TokenType>& operator<<(string tkn){st.push_back(Token<TokenType>(1,tkn));return *this;}
00060         TokenStream<TokenType>& operator>>(string& tkn){if(eof()) return *this; tkn=st[i++].text; return *this;}
00061         const Token<TokenType>& first()const{ return st[i]; }
00062         const Token<TokenType>& last()const{ return st[st.size()-1]; }
00063         size_t count()const{ return st.size()-i; }
00064         bool eof()const{ if(end==(size_t)-1) return i>=st.size(); else return i>=end; }
00065         void setEnd(size_t e = (size_t)-1){ if(e<=st.size() or e==size_t(-1)) end=e; }
00066         size_t getEnd()const{return end;}
00067         tstream_state state()const{ return tstream_state(i); }
00068         void state(tstream_state t){ i=t.i; }
00069 };
00070 
00071 template<class TokenType>
00072 class TokenizerData{
00073 public:
00074         map<string,TokenType> string_token;
00075         map<char,TokenType> spec_token;
00076 };
00077 template<class TokenType, class Init>
00078 class Tokenizer:public TokenizerData<TokenType>{
00079 public:
00080         Init p;
00081         bool in_string;
00082         bool prev_slash;
00083 
00084         template<class T,class B>
00085         bool contains(const map<T,B>& m, const T& t){ return m.find(t)!=m.end(); }
00086         Tokenizer():in_string(false), prev_slash(false){
00087                 p.init(*this);
00088         }
00089         void searchToken(size_t index, stringstream& buf, size_t& start_index, char c, TokenStream<TokenType>& tkn_stream){
00090             const bool verb = false;
00091             if(verb) cout<<"Proc ["<<str(c)<<"]"<<endl;
00092             Token<TokenType> tkn;
00093             if( (!in_string and p.isDelimiter(c)) or (in_string and c=='\"' and !prev_slash) ){
00094                 if(verb) cout<<"... is delimiter"<<endl;
00095                 if(contains(this->string_token, buf.str())){
00096                     if(verb) cout<<"... ... token found"<<endl;
00097                     tkn = this->string_token[buf.str()];
00098                 }else{
00099                     if(verb) cout<<"... ... token is not found. select as text"<<endl;
00100                     tkn = Token<TokenType>((TokenType)1, buf.str());
00101                 }
00102                 tkn.start = start_index;
00103                 tkn.end = index;
00104                 if(tkn.size()>0){
00105                     if(verb){ cout<<"... ... add token "<<tkn; cout <<std::endl; }
00106                     tkn_stream<<tkn;
00107                 }else{
00108                     if(verb) cout<<"... ... ignore token"<<endl;
00109                 }
00110                 buf.str("");
00111                 start_index = index;
00112                 if(contains(this->spec_token, c)){
00113                     if(verb) cout<<"... ...  is special token"<<endl;
00114                     tkn = this->spec_token[c];
00115                     stringstream tmp; tmp<<c;
00116                     tkn.text = tmp.str();
00117                     tkn.start=index;
00118                     tkn.end = index+1;
00119                     tkn_stream<<tkn;
00120                 }
00121             if( c == '\"' ){
00122                 in_string = !in_string;
00123             }
00124 
00125                 start_index+=1;
00126             }else{
00127                 if(verb) cout<<"... is not delimiter"<<endl;
00128                 buf<<c;
00129             }
00130             if(verb) cout<<"... current buffer is ["<<buf.str()<<"]"<<endl;
00131         prev_slash = c == '\\';
00132         }
00133 };
00134 
00135 #define ADD_ERROR( tkn, X )\
00136                 {int l,p; searchLineInfo(tkn.end, l,p);\
00137                 errors<<"In "<<filename<<":"<<l<<":"<<p<<" "<<tkn<<endl<<"    "<<X<<endl;}
00138 
00139 #define TKN_SEARCH_CLOSE_PARENT(END, stream, topen, tclose, X) tkn_search_close_parent(stream, topen, tclose);\
00140     if(END==size_t(-1)){\
00141         PRINT("Unexpected end of file during " X ".");\
00142         int l,p; searchLineInfo(tkn.end, l,p);\
00143         errors<<"In "<<filename<<":"<<l<<":"<<p<<" "<<tkn<<endl<<"    Unexpected end of structure during " X " parsing."<<endl;\
00144         return false;\
00145     }
00146 
00147 #define TKN_SEARCH_OPTIONAL(TKN, RET) if(not (TKN)) RET;
00148 #define TKN_NEXT_SEARCH_OPTIONAL(TKN, RET) stream >> tkn; if(not (TKN)) RET;
00149 #define TKN_NEXT_OPTIONAL(TKN, RET) TKN_NEXT_SEARCH_OPTIONAL(tkn.type==TKN, RET)
00150 
00151 #define TKN_NEXT(TKN) TKN_NEXT_OPTIONAL(TKN, return false)
00152 #define TKN_SEARCH(TKN) TKN_SEARCH_OPTIONAL(TKN, return false)
00153 #define TKN_NEXT_SEARCH(TKN) TKN_NEXT_SEARCH_OPTIONAL(TKN, return false)
00154 
00155 
00156 template<class TokenType>
00157 size_t tkn_search_close_parent(TokenStream<TokenType>& stream, TokenType topen, TokenType tclose){
00158     tstream_state state = stream.state();
00159     int c = 0;
00160     Token<TokenType> tkn;
00161     while(c>=0 and not stream.eof()){
00162         stream >> tkn;
00163         if(tkn.type == topen) c++;
00164         if(tkn.type == tclose)c--;
00165     }
00166     size_t res = size_t(-1);
00167     if(c<0) res = stream.i;
00168     stream.state(state);
00169     return res;
00170 }
00171 
00172 }
00173 
00174 #endif /* PARSER_H_ */


decision_making_parser
Author(s):
autogenerated on Wed Aug 26 2015 11:16:57