$search
00001 /* 00002 Aseba - an event-based framework for distributed robot control 00003 Copyright (C) 2007--2012: 00004 Stephane Magnenat <stephane at magnenat dot net> 00005 (http://stephane.magnenat.net) 00006 and other contributors, see authors.txt for details 00007 00008 This program is free software: you can redistribute it and/or modify 00009 it under the terms of the GNU Lesser General Public License as published 00010 by the Free Software Foundation, version 3 of the License. 00011 00012 This program is distributed in the hope that it will be useful, 00013 but WITHOUT ANY WARRANTY; without even the implied warranty of 00014 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00015 GNU Lesser General Public License for more details. 00016 00017 You should have received a copy of the GNU Lesser General Public License 00018 along with this program. If not, see <http://www.gnu.org/licenses/>. 00019 */ 00020 00021 #include "compiler.h" 00022 #include "../utils/FormatableString.h" 00023 #include <cstdlib> 00024 #include <sstream> 00025 #include <ostream> 00026 #include <cctype> 00027 #include <cstdio> 00028 00029 namespace Aseba 00030 { 00032 Compiler::Token::Token(Type type, SourcePos pos, const std::wstring& value) : 00033 type(type), 00034 sValue(value), 00035 pos(pos) 00036 { 00037 if (type == TOKEN_INT_LITERAL) 00038 { 00039 bool wasSigned = false; 00040 if ((value.length() > 1) && (value[1] == 'x')) 00041 iValue = wcstol(value.c_str() + 2, NULL, 16); 00042 else if ((value.length() > 1) && (value[1] == 'b')) 00043 iValue = wcstol(value.c_str() + 2, NULL, 2); 00044 else 00045 { 00046 iValue = wcstol(value.c_str(), NULL, 10); 00047 wasSigned = true; 00048 } 00049 if ((wasSigned == false) && (iValue > 32767)) 00050 iValue -= 65536; 00051 } 00052 else 00053 iValue = 0; 00054 pos.column--; // column has already been incremented when token is created, so we remove one 00055 pos.character--; // character has already been incremented when token is created, so we remove one 00056 } 00057 00059 const std::wstring Compiler::Token::typeName() const 00060 { 00061 switch (type) 00062 { 00063 case TOKEN_END_OF_STREAM: return translate(ERROR_TOKEN_END_OF_STREAM); 00064 case TOKEN_STR_when: return translate(ERROR_TOKEN_STR_when); 00065 case TOKEN_STR_emit: return translate(ERROR_TOKEN_STR_emit); 00066 case TOKEN_STR_for: return translate(ERROR_TOKEN_STR_for); 00067 case TOKEN_STR_in: return translate(ERROR_TOKEN_STR_in); 00068 case TOKEN_STR_step: return translate(ERROR_TOKEN_STR_step); 00069 case TOKEN_STR_while: return translate(ERROR_TOKEN_STR_while); 00070 case TOKEN_STR_do: return translate(ERROR_TOKEN_STR_do); 00071 case TOKEN_STR_if: return translate(ERROR_TOKEN_STR_if); 00072 case TOKEN_STR_then: return translate(ERROR_TOKEN_STR_then); 00073 case TOKEN_STR_else: return translate(ERROR_TOKEN_STR_else); 00074 case TOKEN_STR_elseif: return translate(ERROR_TOKEN_STR_elseif); 00075 case TOKEN_STR_end: return translate(ERROR_TOKEN_STR_end); 00076 case TOKEN_STR_var: return translate(ERROR_TOKEN_STR_var); 00077 case TOKEN_STR_call: return translate(ERROR_TOKEN_STR_call); 00078 case TOKEN_STR_sub: return translate(ERROR_TOKEN_STR_sub); 00079 case TOKEN_STR_callsub: return translate(ERROR_TOKEN_STR_callsub); 00080 case TOKEN_STR_onevent: return translate(ERROR_TOKEN_STR_onevent); 00081 case TOKEN_STR_abs: return translate(ERROR_TOKEN_STR_abs); 00082 case TOKEN_STR_return: return translate(ERROR_TOKEN_STR_return); 00083 case TOKEN_STRING_LITERAL: return translate(ERROR_TOKEN_STRING_LITERAL); 00084 case TOKEN_INT_LITERAL: return translate(ERROR_TOKEN_INT_LITERAL); 00085 case TOKEN_PAR_OPEN: return translate(ERROR_TOKEN_PAR_OPEN); 00086 case TOKEN_PAR_CLOSE: return translate(ERROR_TOKEN_PAR_CLOSE); 00087 case TOKEN_BRACKET_OPEN: return translate(ERROR_TOKEN_BRACKET_OPEN); 00088 case TOKEN_BRACKET_CLOSE: return translate(ERROR_TOKEN_BRACKET_CLOSE); 00089 case TOKEN_COLON: return translate(ERROR_TOKEN_COLON); 00090 case TOKEN_COMMA: return translate(ERROR_TOKEN_COMMA); 00091 case TOKEN_ASSIGN: return translate(ERROR_TOKEN_ASSIGN); 00092 case TOKEN_OP_OR: return translate(ERROR_TOKEN_OP_OR); 00093 case TOKEN_OP_AND: return translate(ERROR_TOKEN_OP_AND); 00094 case TOKEN_OP_NOT: return translate(ERROR_TOKEN_OP_NOT); 00095 case TOKEN_OP_BIT_OR: return translate(ERROR_TOKEN_OP_BIT_OR); 00096 case TOKEN_OP_BIT_XOR: return translate(ERROR_TOKEN_OP_BIT_XOR); 00097 case TOKEN_OP_BIT_AND: return translate(ERROR_TOKEN_OP_BIT_AND); 00098 case TOKEN_OP_BIT_NOT: return translate(ERROR_TOKEN_OP_BIT_NOT); 00099 case TOKEN_OP_BIT_OR_EQUAL: return translate(ERROR_TOKEN_OP_BIT_OR_EQUAL); 00100 case TOKEN_OP_BIT_XOR_EQUAL: return translate(ERROR_TOKEN_OP_BIT_XOR_EQUAL); 00101 case TOKEN_OP_BIT_AND_EQUAL: return translate(ERROR_TOKEN_OP_BIT_AND_EQUAL); 00102 case TOKEN_OP_EQUAL: return translate(ERROR_TOKEN_OP_EQUAL); 00103 case TOKEN_OP_NOT_EQUAL: return translate(ERROR_TOKEN_OP_NOT_EQUAL); 00104 case TOKEN_OP_BIGGER: return translate(ERROR_TOKEN_OP_BIGGER); 00105 case TOKEN_OP_BIGGER_EQUAL: return translate(ERROR_TOKEN_OP_BIGGER_EQUAL); 00106 case TOKEN_OP_SMALLER: return translate(ERROR_TOKEN_OP_SMALLER); 00107 case TOKEN_OP_SMALLER_EQUAL: return translate(ERROR_TOKEN_OP_SMALLER_EQUAL); 00108 case TOKEN_OP_SHIFT_LEFT: return translate(ERROR_TOKEN_OP_SHIFT_LEFT); 00109 case TOKEN_OP_SHIFT_RIGHT: return translate(ERROR_TOKEN_OP_SHIFT_RIGHT); 00110 case TOKEN_OP_SHIFT_LEFT_EQUAL: return translate(ERROR_TOKEN_OP_SHIFT_LEFT_EQUAL); 00111 case TOKEN_OP_SHIFT_RIGHT_EQUAL: return translate(ERROR_TOKEN_OP_SHIFT_RIGHT_EQUAL); 00112 case TOKEN_OP_ADD: return translate(ERROR_TOKEN_OP_ADD); 00113 case TOKEN_OP_NEG: return translate(ERROR_TOKEN_OP_NEG); 00114 case TOKEN_OP_ADD_EQUAL: return translate(ERROR_TOKEN_OP_ADD_EQUAL); 00115 case TOKEN_OP_NEG_EQUAL: return translate(ERROR_TOKEN_OP_NEG_EQUAL); 00116 case TOKEN_OP_PLUS_PLUS: return translate(ERROR_TOKEN_OP_PLUS_PLUS); 00117 case TOKEN_OP_MINUS_MINUS: return translate(ERROR_TOKEN_OP_MINUS_MINUS); 00118 case TOKEN_OP_MULT: return translate(ERROR_TOKEN_OP_MULT); 00119 case TOKEN_OP_DIV: return translate(ERROR_TOKEN_OP_DIV); 00120 case TOKEN_OP_MOD: return translate(ERROR_TOKEN_OP_MOD); 00121 case TOKEN_OP_MULT_EQUAL: return translate(ERROR_TOKEN_OP_MULT_EQUAL); 00122 case TOKEN_OP_DIV_EQUAL: return translate(ERROR_TOKEN_OP_DIV_EQUAL); 00123 case TOKEN_OP_MOD_EQUAL: return translate(ERROR_TOKEN_OP_MOD_EQUAL); 00124 default: return translate(ERROR_TOKEN_UNKNOWN); 00125 } 00126 } 00127 00129 std::wstring Compiler::Token::toWString() const 00130 { 00131 std::wostringstream oss; 00132 oss << translate(ERROR_LINE) << pos.row + 1 << translate(ERROR_COL) << pos.column + 1 << L" : "; 00133 oss << typeName(); 00134 if (type == TOKEN_INT_LITERAL) 00135 oss << L" : " << iValue; 00136 if (type == TOKEN_STRING_LITERAL) 00137 oss << L" : " << sValue; 00138 return oss.str(); 00139 } 00140 00141 00144 void Compiler::tokenize(std::wistream& source) 00145 { 00146 tokens.clear(); 00147 SourcePos pos(0, 0, 0); 00148 const unsigned tabSize = 4; 00149 00150 // tokenize text source 00151 while (source.good()) 00152 { 00153 wchar_t c = source.get(); 00154 00155 if (source.eof()) 00156 break; 00157 00158 pos.column++; 00159 pos.character++; 00160 00161 switch (c) 00162 { 00163 // simple cases of one character 00164 case ' ': break; 00165 //case '\t': pos.column += tabSize - 1; break; 00166 case '\t': break; 00167 case '\n': pos.row++; pos.column = -1; break; // -1 so next call to pos.column++ result set 0 00168 case '\r': pos.column = -1; break; // -1 so next call to pos.column++ result set 0 00169 case '(': tokens.push_back(Token(Token::TOKEN_PAR_OPEN, pos)); break; 00170 case ')': tokens.push_back(Token(Token::TOKEN_PAR_CLOSE, pos)); break; 00171 case '[': tokens.push_back(Token(Token::TOKEN_BRACKET_OPEN, pos)); break; 00172 case ']': tokens.push_back(Token(Token::TOKEN_BRACKET_CLOSE, pos)); break; 00173 case ':': tokens.push_back(Token(Token::TOKEN_COLON, pos)); break; 00174 case ',': tokens.push_back(Token(Token::TOKEN_COMMA, pos)); break; 00175 00176 // special case for comment 00177 case '#': 00178 { 00179 // check if it's a comment block #* ... *# 00180 if (source.peek() == '*') 00181 { 00182 // comment block 00183 // record position of the begining 00184 SourcePos begin(pos); 00185 // move forward by 2 characters then search for the end 00186 int step = 2; 00187 while ((step > 0) || (c != '*') || (source.peek() != '#')) 00188 { 00189 if (step) 00190 step--; 00191 00192 if (c == '\t') 00193 pos.column += tabSize; 00194 else if (c == '\n') 00195 { 00196 pos.row++; 00197 pos.column = 0; 00198 } 00199 else 00200 pos.column++; 00201 c = source.get(); 00202 pos.character++; 00203 if (source.eof()) 00204 { 00205 // EOF -> unbalanced block 00206 throw TranslatableError(begin, ERROR_UNBALANCED_COMMENT_BLOCK); 00207 } 00208 } 00209 // fetch the # 00210 getNextCharacter(source, pos); 00211 } 00212 else 00213 { 00214 // simple comment 00215 while ((c != '\n') && (c != '\r') && (!source.eof())) 00216 { 00217 if (c == '\t') 00218 pos.column += tabSize; 00219 else 00220 pos.column++; 00221 c = source.get(); 00222 pos.character++; 00223 } 00224 if (c == '\n') 00225 { 00226 pos.row++; 00227 pos.column = 0; 00228 } 00229 else if (c == '\r') 00230 pos.column = 0; 00231 } 00232 } 00233 break; 00234 00235 // cases that require one character look-ahead 00236 case '+': 00237 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_ADD_EQUAL)) 00238 break; 00239 if (testNextCharacter(source, pos, '+', Token::TOKEN_OP_PLUS_PLUS)) 00240 break; 00241 tokens.push_back(Token(Token::TOKEN_OP_ADD, pos)); 00242 break; 00243 00244 case '-': 00245 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_NEG_EQUAL)) 00246 break; 00247 if (testNextCharacter(source, pos, '-', Token::TOKEN_OP_MINUS_MINUS)) 00248 break; 00249 tokens.push_back(Token(Token::TOKEN_OP_NEG, pos)); 00250 break; 00251 00252 case '*': 00253 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_MULT_EQUAL)) 00254 break; 00255 tokens.push_back(Token(Token::TOKEN_OP_MULT, pos)); 00256 break; 00257 00258 case '/': 00259 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_DIV_EQUAL)) 00260 break; 00261 tokens.push_back(Token(Token::TOKEN_OP_DIV, pos)); 00262 break; 00263 00264 case '%': 00265 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_MOD_EQUAL)) 00266 break; 00267 tokens.push_back(Token(Token::TOKEN_OP_MOD, pos)); 00268 break; 00269 00270 case '|': 00271 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_OR_EQUAL)) 00272 break; 00273 tokens.push_back(Token(Token::TOKEN_OP_BIT_OR, pos)); 00274 break; 00275 00276 case '^': 00277 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_XOR_EQUAL)) 00278 break; 00279 tokens.push_back(Token(Token::TOKEN_OP_BIT_XOR, pos)); 00280 break; 00281 00282 case '&': 00283 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_AND_EQUAL)) 00284 break; 00285 tokens.push_back(Token(Token::TOKEN_OP_BIT_AND, pos)); 00286 break; 00287 00288 case '~': 00289 tokens.push_back(Token(Token::TOKEN_OP_BIT_NOT, pos)); 00290 break; 00291 00292 case '!': 00293 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_NOT_EQUAL)) 00294 break; 00295 throw TranslatableError(pos, ERROR_SYNTAX); 00296 break; 00297 00298 case '=': 00299 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_EQUAL)) 00300 break; 00301 tokens.push_back(Token(Token::TOKEN_ASSIGN, pos)); 00302 break; 00303 00304 // cases that require two characters look-ahead 00305 case '<': 00306 if (source.peek() == '<') 00307 { 00308 // << 00309 getNextCharacter(source, pos); 00310 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SHIFT_LEFT_EQUAL)) 00311 break; 00312 tokens.push_back(Token(Token::TOKEN_OP_SHIFT_LEFT, pos)); 00313 break; 00314 } 00315 // < 00316 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SMALLER_EQUAL)) 00317 break; 00318 tokens.push_back(Token(Token::TOKEN_OP_SMALLER, pos)); 00319 break; 00320 00321 case '>': 00322 if (source.peek() == '>') 00323 { 00324 // >> 00325 getNextCharacter(source, pos); 00326 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SHIFT_RIGHT_EQUAL)) 00327 break; 00328 tokens.push_back(Token(Token::TOKEN_OP_SHIFT_RIGHT, pos)); 00329 break; 00330 } 00331 // > 00332 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIGGER_EQUAL)) 00333 break; 00334 tokens.push_back(Token(Token::TOKEN_OP_BIGGER, pos)); 00335 break; 00336 00337 // cases that require to look for a while 00338 default: 00339 { 00340 // check first character 00341 if (!iswalnum(c) && (c != '_')) 00342 throw TranslatableError(pos, ERROR_INVALID_IDENTIFIER).arg((unsigned)c, 0, 16); 00343 00344 // get a string 00345 std::wstring s; 00346 s += c; 00347 wchar_t nextC = source.peek(); 00348 int posIncrement = 0; 00349 while ((source.good()) && (iswalnum(nextC) || (nextC == '_') || (nextC == '.'))) 00350 { 00351 s += nextC; 00352 source.get(); 00353 posIncrement++; 00354 nextC = source.peek(); 00355 } 00356 00357 // we now have a string, let's check what it is 00358 if (std::isdigit(s[0])) 00359 { 00360 // check if hex or binary 00361 if ((s.length() > 1) && (s[0] == '0') && (!std::isdigit(s[1]))) 00362 { 00363 // check if we have a valid number 00364 if (s[1] == 'x') 00365 { 00366 for (unsigned i = 2; i < s.size(); i++) 00367 if (!std::isxdigit(s[i])) 00368 throw TranslatableError(pos, ERROR_INVALID_HEXA_NUMBER); 00369 } 00370 else if (s[1] == 'b') 00371 { 00372 for (unsigned i = 2; i < s.size(); i++) 00373 if ((s[i] != '0') && (s[i] != '1')) 00374 throw TranslatableError(pos, ERROR_INVALID_BINARY_NUMBER); 00375 } 00376 else 00377 throw TranslatableError(pos, ERROR_NUMBER_INVALID_BASE); 00378 00379 } 00380 else 00381 { 00382 // check if we have a valid number 00383 for (unsigned i = 1; i < s.size(); i++) 00384 if (!std::isdigit(s[i])) 00385 throw TranslatableError(pos, ERROR_IN_NUMBER); 00386 } 00387 tokens.push_back(Token(Token::TOKEN_INT_LITERAL, pos, s)); 00388 } 00389 else 00390 { 00391 // check if it is a known keyword 00392 if (s == L"when") 00393 tokens.push_back(Token(Token::TOKEN_STR_when, pos)); 00394 else if (s == L"emit") 00395 tokens.push_back(Token(Token::TOKEN_STR_emit, pos)); 00396 else if (s == L"for") 00397 tokens.push_back(Token(Token::TOKEN_STR_for, pos)); 00398 else if (s == L"in") 00399 tokens.push_back(Token(Token::TOKEN_STR_in, pos)); 00400 else if (s == L"step") 00401 tokens.push_back(Token(Token::TOKEN_STR_step, pos)); 00402 else if (s == L"while") 00403 tokens.push_back(Token(Token::TOKEN_STR_while, pos)); 00404 else if (s == L"do") 00405 tokens.push_back(Token(Token::TOKEN_STR_do, pos)); 00406 else if (s == L"if") 00407 tokens.push_back(Token(Token::TOKEN_STR_if, pos)); 00408 else if (s == L"then") 00409 tokens.push_back(Token(Token::TOKEN_STR_then, pos)); 00410 else if (s == L"else") 00411 tokens.push_back(Token(Token::TOKEN_STR_else, pos)); 00412 else if (s == L"elseif") 00413 tokens.push_back(Token(Token::TOKEN_STR_elseif, pos)); 00414 else if (s == L"end") 00415 tokens.push_back(Token(Token::TOKEN_STR_end, pos)); 00416 else if (s == L"var") 00417 tokens.push_back(Token(Token::TOKEN_STR_var, pos)); 00418 else if (s == L"call") 00419 tokens.push_back(Token(Token::TOKEN_STR_call, pos)); 00420 else if (s == L"sub") 00421 tokens.push_back(Token(Token::TOKEN_STR_sub, pos)); 00422 else if (s == L"callsub") 00423 tokens.push_back(Token(Token::TOKEN_STR_callsub, pos)); 00424 else if (s == L"onevent") 00425 tokens.push_back(Token(Token::TOKEN_STR_onevent, pos)); 00426 else if (s == L"abs") 00427 tokens.push_back(Token(Token::TOKEN_STR_abs, pos)); 00428 else if (s == L"return") 00429 tokens.push_back(Token(Token::TOKEN_STR_return, pos)); 00430 else if (s == L"or") 00431 tokens.push_back(Token(Token::TOKEN_OP_OR, pos)); 00432 else if (s == L"and") 00433 tokens.push_back(Token(Token::TOKEN_OP_AND, pos)); 00434 else if (s == L"not") 00435 tokens.push_back(Token(Token::TOKEN_OP_NOT, pos)); 00436 else 00437 tokens.push_back(Token(Token::TOKEN_STRING_LITERAL, pos, s)); 00438 } 00439 00440 pos.column += posIncrement; 00441 pos.character += posIncrement; 00442 } 00443 break; 00444 } // switch (c) 00445 } // while (source.good()) 00446 00447 tokens.push_back(Token(Token::TOKEN_END_OF_STREAM, pos)); 00448 } 00449 00450 wchar_t Compiler::getNextCharacter(std::wistream &source, SourcePos &pos) 00451 { 00452 pos.column++; 00453 pos.character++; 00454 return source.get(); 00455 } 00456 00457 bool Compiler::testNextCharacter(std::wistream &source, SourcePos &pos, wchar_t test, Token::Type tokenIfTrue) 00458 { 00459 if (source.peek() == int(test)) 00460 { 00461 tokens.push_back(Token(tokenIfTrue, pos)); 00462 getNextCharacter(source, pos); 00463 return true; 00464 } 00465 return false; 00466 } 00467 00469 void Compiler::dumpTokens(std::wostream &dest) const 00470 { 00471 for (unsigned i = 0; i < tokens.size(); i++) 00472 dest << tokens[i].toWString() << std::endl; 00473 } 00474 }; // Aseba