00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "compiler.h"
00022 #include "../utils/FormatableString.h"
00023 #include <cstdlib>
00024 #include <sstream>
00025 #include <ostream>
00026 #include <cctype>
00027 #include <cstdio>
00028
00029 namespace Aseba
00030 {
00032 Compiler::Token::Token(Type type, SourcePos pos, const std::wstring& value) :
00033 type(type),
00034 sValue(value),
00035 pos(pos)
00036 {
00037 if (type == TOKEN_INT_LITERAL)
00038 {
00039 bool wasSigned = false;
00040 if ((value.length() > 1) && (value[1] == 'x'))
00041 iValue = wcstol(value.c_str() + 2, NULL, 16);
00042 else if ((value.length() > 1) && (value[1] == 'b'))
00043 iValue = wcstol(value.c_str() + 2, NULL, 2);
00044 else
00045 {
00046 iValue = wcstol(value.c_str(), NULL, 10);
00047 wasSigned = true;
00048 }
00049 if ((wasSigned == false) && (iValue > 32767))
00050 iValue -= 65536;
00051 }
00052 else
00053 iValue = 0;
00054 pos.column--;
00055 pos.character--;
00056 }
00057
00059 const std::wstring Compiler::Token::typeName() const
00060 {
00061 switch (type)
00062 {
00063 case TOKEN_END_OF_STREAM: return translate(ERROR_TOKEN_END_OF_STREAM);
00064 case TOKEN_STR_when: return translate(ERROR_TOKEN_STR_when);
00065 case TOKEN_STR_emit: return translate(ERROR_TOKEN_STR_emit);
00066 case TOKEN_STR_for: return translate(ERROR_TOKEN_STR_for);
00067 case TOKEN_STR_in: return translate(ERROR_TOKEN_STR_in);
00068 case TOKEN_STR_step: return translate(ERROR_TOKEN_STR_step);
00069 case TOKEN_STR_while: return translate(ERROR_TOKEN_STR_while);
00070 case TOKEN_STR_do: return translate(ERROR_TOKEN_STR_do);
00071 case TOKEN_STR_if: return translate(ERROR_TOKEN_STR_if);
00072 case TOKEN_STR_then: return translate(ERROR_TOKEN_STR_then);
00073 case TOKEN_STR_else: return translate(ERROR_TOKEN_STR_else);
00074 case TOKEN_STR_elseif: return translate(ERROR_TOKEN_STR_elseif);
00075 case TOKEN_STR_end: return translate(ERROR_TOKEN_STR_end);
00076 case TOKEN_STR_var: return translate(ERROR_TOKEN_STR_var);
00077 case TOKEN_STR_call: return translate(ERROR_TOKEN_STR_call);
00078 case TOKEN_STR_sub: return translate(ERROR_TOKEN_STR_sub);
00079 case TOKEN_STR_callsub: return translate(ERROR_TOKEN_STR_callsub);
00080 case TOKEN_STR_onevent: return translate(ERROR_TOKEN_STR_onevent);
00081 case TOKEN_STR_abs: return translate(ERROR_TOKEN_STR_abs);
00082 case TOKEN_STR_return: return translate(ERROR_TOKEN_STR_return);
00083 case TOKEN_STRING_LITERAL: return translate(ERROR_TOKEN_STRING_LITERAL);
00084 case TOKEN_INT_LITERAL: return translate(ERROR_TOKEN_INT_LITERAL);
00085 case TOKEN_PAR_OPEN: return translate(ERROR_TOKEN_PAR_OPEN);
00086 case TOKEN_PAR_CLOSE: return translate(ERROR_TOKEN_PAR_CLOSE);
00087 case TOKEN_BRACKET_OPEN: return translate(ERROR_TOKEN_BRACKET_OPEN);
00088 case TOKEN_BRACKET_CLOSE: return translate(ERROR_TOKEN_BRACKET_CLOSE);
00089 case TOKEN_COLON: return translate(ERROR_TOKEN_COLON);
00090 case TOKEN_COMMA: return translate(ERROR_TOKEN_COMMA);
00091 case TOKEN_ASSIGN: return translate(ERROR_TOKEN_ASSIGN);
00092 case TOKEN_OP_OR: return translate(ERROR_TOKEN_OP_OR);
00093 case TOKEN_OP_AND: return translate(ERROR_TOKEN_OP_AND);
00094 case TOKEN_OP_NOT: return translate(ERROR_TOKEN_OP_NOT);
00095 case TOKEN_OP_BIT_OR: return translate(ERROR_TOKEN_OP_BIT_OR);
00096 case TOKEN_OP_BIT_XOR: return translate(ERROR_TOKEN_OP_BIT_XOR);
00097 case TOKEN_OP_BIT_AND: return translate(ERROR_TOKEN_OP_BIT_AND);
00098 case TOKEN_OP_BIT_NOT: return translate(ERROR_TOKEN_OP_BIT_NOT);
00099 case TOKEN_OP_BIT_OR_EQUAL: return translate(ERROR_TOKEN_OP_BIT_OR_EQUAL);
00100 case TOKEN_OP_BIT_XOR_EQUAL: return translate(ERROR_TOKEN_OP_BIT_XOR_EQUAL);
00101 case TOKEN_OP_BIT_AND_EQUAL: return translate(ERROR_TOKEN_OP_BIT_AND_EQUAL);
00102 case TOKEN_OP_EQUAL: return translate(ERROR_TOKEN_OP_EQUAL);
00103 case TOKEN_OP_NOT_EQUAL: return translate(ERROR_TOKEN_OP_NOT_EQUAL);
00104 case TOKEN_OP_BIGGER: return translate(ERROR_TOKEN_OP_BIGGER);
00105 case TOKEN_OP_BIGGER_EQUAL: return translate(ERROR_TOKEN_OP_BIGGER_EQUAL);
00106 case TOKEN_OP_SMALLER: return translate(ERROR_TOKEN_OP_SMALLER);
00107 case TOKEN_OP_SMALLER_EQUAL: return translate(ERROR_TOKEN_OP_SMALLER_EQUAL);
00108 case TOKEN_OP_SHIFT_LEFT: return translate(ERROR_TOKEN_OP_SHIFT_LEFT);
00109 case TOKEN_OP_SHIFT_RIGHT: return translate(ERROR_TOKEN_OP_SHIFT_RIGHT);
00110 case TOKEN_OP_SHIFT_LEFT_EQUAL: return translate(ERROR_TOKEN_OP_SHIFT_LEFT_EQUAL);
00111 case TOKEN_OP_SHIFT_RIGHT_EQUAL: return translate(ERROR_TOKEN_OP_SHIFT_RIGHT_EQUAL);
00112 case TOKEN_OP_ADD: return translate(ERROR_TOKEN_OP_ADD);
00113 case TOKEN_OP_NEG: return translate(ERROR_TOKEN_OP_NEG);
00114 case TOKEN_OP_ADD_EQUAL: return translate(ERROR_TOKEN_OP_ADD_EQUAL);
00115 case TOKEN_OP_NEG_EQUAL: return translate(ERROR_TOKEN_OP_NEG_EQUAL);
00116 case TOKEN_OP_PLUS_PLUS: return translate(ERROR_TOKEN_OP_PLUS_PLUS);
00117 case TOKEN_OP_MINUS_MINUS: return translate(ERROR_TOKEN_OP_MINUS_MINUS);
00118 case TOKEN_OP_MULT: return translate(ERROR_TOKEN_OP_MULT);
00119 case TOKEN_OP_DIV: return translate(ERROR_TOKEN_OP_DIV);
00120 case TOKEN_OP_MOD: return translate(ERROR_TOKEN_OP_MOD);
00121 case TOKEN_OP_MULT_EQUAL: return translate(ERROR_TOKEN_OP_MULT_EQUAL);
00122 case TOKEN_OP_DIV_EQUAL: return translate(ERROR_TOKEN_OP_DIV_EQUAL);
00123 case TOKEN_OP_MOD_EQUAL: return translate(ERROR_TOKEN_OP_MOD_EQUAL);
00124 default: return translate(ERROR_TOKEN_UNKNOWN);
00125 }
00126 }
00127
00129 std::wstring Compiler::Token::toWString() const
00130 {
00131 std::wostringstream oss;
00132 oss << translate(ERROR_LINE) << pos.row + 1 << translate(ERROR_COL) << pos.column + 1 << L" : ";
00133 oss << typeName();
00134 if (type == TOKEN_INT_LITERAL)
00135 oss << L" : " << iValue;
00136 if (type == TOKEN_STRING_LITERAL)
00137 oss << L" : " << sValue;
00138 return oss.str();
00139 }
00140
00141
00144 void Compiler::tokenize(std::wistream& source)
00145 {
00146 tokens.clear();
00147 SourcePos pos(0, 0, 0);
00148 const unsigned tabSize = 4;
00149
00150
00151 while (source.good())
00152 {
00153 wchar_t c = source.get();
00154
00155 if (source.eof())
00156 break;
00157
00158 pos.column++;
00159 pos.character++;
00160
00161 switch (c)
00162 {
00163
00164 case ' ': break;
00165
00166 case '\t': break;
00167 case '\n': pos.row++; pos.column = -1; break;
00168 case '\r': pos.column = -1; break;
00169 case '(': tokens.push_back(Token(Token::TOKEN_PAR_OPEN, pos)); break;
00170 case ')': tokens.push_back(Token(Token::TOKEN_PAR_CLOSE, pos)); break;
00171 case '[': tokens.push_back(Token(Token::TOKEN_BRACKET_OPEN, pos)); break;
00172 case ']': tokens.push_back(Token(Token::TOKEN_BRACKET_CLOSE, pos)); break;
00173 case ':': tokens.push_back(Token(Token::TOKEN_COLON, pos)); break;
00174 case ',': tokens.push_back(Token(Token::TOKEN_COMMA, pos)); break;
00175
00176
00177 case '#':
00178 {
00179
00180 if (source.peek() == '*')
00181 {
00182
00183
00184 SourcePos begin(pos);
00185
00186 int step = 2;
00187 while ((step > 0) || (c != '*') || (source.peek() != '#'))
00188 {
00189 if (step)
00190 step--;
00191
00192 if (c == '\t')
00193 pos.column += tabSize;
00194 else if (c == '\n')
00195 {
00196 pos.row++;
00197 pos.column = 0;
00198 }
00199 else
00200 pos.column++;
00201 c = source.get();
00202 pos.character++;
00203 if (source.eof())
00204 {
00205
00206 throw TranslatableError(begin, ERROR_UNBALANCED_COMMENT_BLOCK);
00207 }
00208 }
00209
00210 getNextCharacter(source, pos);
00211 }
00212 else
00213 {
00214
00215 while ((c != '\n') && (c != '\r') && (!source.eof()))
00216 {
00217 if (c == '\t')
00218 pos.column += tabSize;
00219 else
00220 pos.column++;
00221 c = source.get();
00222 pos.character++;
00223 }
00224 if (c == '\n')
00225 {
00226 pos.row++;
00227 pos.column = 0;
00228 }
00229 else if (c == '\r')
00230 pos.column = 0;
00231 }
00232 }
00233 break;
00234
00235
00236 case '+':
00237 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_ADD_EQUAL))
00238 break;
00239 if (testNextCharacter(source, pos, '+', Token::TOKEN_OP_PLUS_PLUS))
00240 break;
00241 tokens.push_back(Token(Token::TOKEN_OP_ADD, pos));
00242 break;
00243
00244 case '-':
00245 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_NEG_EQUAL))
00246 break;
00247 if (testNextCharacter(source, pos, '-', Token::TOKEN_OP_MINUS_MINUS))
00248 break;
00249 tokens.push_back(Token(Token::TOKEN_OP_NEG, pos));
00250 break;
00251
00252 case '*':
00253 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_MULT_EQUAL))
00254 break;
00255 tokens.push_back(Token(Token::TOKEN_OP_MULT, pos));
00256 break;
00257
00258 case '/':
00259 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_DIV_EQUAL))
00260 break;
00261 tokens.push_back(Token(Token::TOKEN_OP_DIV, pos));
00262 break;
00263
00264 case '%':
00265 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_MOD_EQUAL))
00266 break;
00267 tokens.push_back(Token(Token::TOKEN_OP_MOD, pos));
00268 break;
00269
00270 case '|':
00271 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_OR_EQUAL))
00272 break;
00273 tokens.push_back(Token(Token::TOKEN_OP_BIT_OR, pos));
00274 break;
00275
00276 case '^':
00277 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_XOR_EQUAL))
00278 break;
00279 tokens.push_back(Token(Token::TOKEN_OP_BIT_XOR, pos));
00280 break;
00281
00282 case '&':
00283 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIT_AND_EQUAL))
00284 break;
00285 tokens.push_back(Token(Token::TOKEN_OP_BIT_AND, pos));
00286 break;
00287
00288 case '~':
00289 tokens.push_back(Token(Token::TOKEN_OP_BIT_NOT, pos));
00290 break;
00291
00292 case '!':
00293 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_NOT_EQUAL))
00294 break;
00295 throw TranslatableError(pos, ERROR_SYNTAX);
00296 break;
00297
00298 case '=':
00299 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_EQUAL))
00300 break;
00301 tokens.push_back(Token(Token::TOKEN_ASSIGN, pos));
00302 break;
00303
00304
00305 case '<':
00306 if (source.peek() == '<')
00307 {
00308
00309 getNextCharacter(source, pos);
00310 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SHIFT_LEFT_EQUAL))
00311 break;
00312 tokens.push_back(Token(Token::TOKEN_OP_SHIFT_LEFT, pos));
00313 break;
00314 }
00315
00316 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SMALLER_EQUAL))
00317 break;
00318 tokens.push_back(Token(Token::TOKEN_OP_SMALLER, pos));
00319 break;
00320
00321 case '>':
00322 if (source.peek() == '>')
00323 {
00324
00325 getNextCharacter(source, pos);
00326 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_SHIFT_RIGHT_EQUAL))
00327 break;
00328 tokens.push_back(Token(Token::TOKEN_OP_SHIFT_RIGHT, pos));
00329 break;
00330 }
00331
00332 if (testNextCharacter(source, pos, '=', Token::TOKEN_OP_BIGGER_EQUAL))
00333 break;
00334 tokens.push_back(Token(Token::TOKEN_OP_BIGGER, pos));
00335 break;
00336
00337
00338 default:
00339 {
00340
00341 if (!iswalnum(c) && (c != '_'))
00342 throw TranslatableError(pos, ERROR_INVALID_IDENTIFIER).arg((unsigned)c, 0, 16);
00343
00344
00345 std::wstring s;
00346 s += c;
00347 wchar_t nextC = source.peek();
00348 int posIncrement = 0;
00349 while ((source.good()) && (iswalnum(nextC) || (nextC == '_') || (nextC == '.')))
00350 {
00351 s += nextC;
00352 source.get();
00353 posIncrement++;
00354 nextC = source.peek();
00355 }
00356
00357
00358 if (std::isdigit(s[0]))
00359 {
00360
00361 if ((s.length() > 1) && (s[0] == '0') && (!std::isdigit(s[1])))
00362 {
00363
00364 if (s[1] == 'x')
00365 {
00366 for (unsigned i = 2; i < s.size(); i++)
00367 if (!std::isxdigit(s[i]))
00368 throw TranslatableError(pos, ERROR_INVALID_HEXA_NUMBER);
00369 }
00370 else if (s[1] == 'b')
00371 {
00372 for (unsigned i = 2; i < s.size(); i++)
00373 if ((s[i] != '0') && (s[i] != '1'))
00374 throw TranslatableError(pos, ERROR_INVALID_BINARY_NUMBER);
00375 }
00376 else
00377 throw TranslatableError(pos, ERROR_NUMBER_INVALID_BASE);
00378
00379 }
00380 else
00381 {
00382
00383 for (unsigned i = 1; i < s.size(); i++)
00384 if (!std::isdigit(s[i]))
00385 throw TranslatableError(pos, ERROR_IN_NUMBER);
00386 }
00387 tokens.push_back(Token(Token::TOKEN_INT_LITERAL, pos, s));
00388 }
00389 else
00390 {
00391
00392 if (s == L"when")
00393 tokens.push_back(Token(Token::TOKEN_STR_when, pos));
00394 else if (s == L"emit")
00395 tokens.push_back(Token(Token::TOKEN_STR_emit, pos));
00396 else if (s == L"for")
00397 tokens.push_back(Token(Token::TOKEN_STR_for, pos));
00398 else if (s == L"in")
00399 tokens.push_back(Token(Token::TOKEN_STR_in, pos));
00400 else if (s == L"step")
00401 tokens.push_back(Token(Token::TOKEN_STR_step, pos));
00402 else if (s == L"while")
00403 tokens.push_back(Token(Token::TOKEN_STR_while, pos));
00404 else if (s == L"do")
00405 tokens.push_back(Token(Token::TOKEN_STR_do, pos));
00406 else if (s == L"if")
00407 tokens.push_back(Token(Token::TOKEN_STR_if, pos));
00408 else if (s == L"then")
00409 tokens.push_back(Token(Token::TOKEN_STR_then, pos));
00410 else if (s == L"else")
00411 tokens.push_back(Token(Token::TOKEN_STR_else, pos));
00412 else if (s == L"elseif")
00413 tokens.push_back(Token(Token::TOKEN_STR_elseif, pos));
00414 else if (s == L"end")
00415 tokens.push_back(Token(Token::TOKEN_STR_end, pos));
00416 else if (s == L"var")
00417 tokens.push_back(Token(Token::TOKEN_STR_var, pos));
00418 else if (s == L"call")
00419 tokens.push_back(Token(Token::TOKEN_STR_call, pos));
00420 else if (s == L"sub")
00421 tokens.push_back(Token(Token::TOKEN_STR_sub, pos));
00422 else if (s == L"callsub")
00423 tokens.push_back(Token(Token::TOKEN_STR_callsub, pos));
00424 else if (s == L"onevent")
00425 tokens.push_back(Token(Token::TOKEN_STR_onevent, pos));
00426 else if (s == L"abs")
00427 tokens.push_back(Token(Token::TOKEN_STR_abs, pos));
00428 else if (s == L"return")
00429 tokens.push_back(Token(Token::TOKEN_STR_return, pos));
00430 else if (s == L"or")
00431 tokens.push_back(Token(Token::TOKEN_OP_OR, pos));
00432 else if (s == L"and")
00433 tokens.push_back(Token(Token::TOKEN_OP_AND, pos));
00434 else if (s == L"not")
00435 tokens.push_back(Token(Token::TOKEN_OP_NOT, pos));
00436 else
00437 tokens.push_back(Token(Token::TOKEN_STRING_LITERAL, pos, s));
00438 }
00439
00440 pos.column += posIncrement;
00441 pos.character += posIncrement;
00442 }
00443 break;
00444 }
00445 }
00446
00447 tokens.push_back(Token(Token::TOKEN_END_OF_STREAM, pos));
00448 }
00449
00450 wchar_t Compiler::getNextCharacter(std::wistream &source, SourcePos &pos)
00451 {
00452 pos.column++;
00453 pos.character++;
00454 return source.get();
00455 }
00456
00457 bool Compiler::testNextCharacter(std::wistream &source, SourcePos &pos, wchar_t test, Token::Type tokenIfTrue)
00458 {
00459 if (source.peek() == int(test))
00460 {
00461 tokens.push_back(Token(tokenIfTrue, pos));
00462 getNextCharacter(source, pos);
00463 return true;
00464 }
00465 return false;
00466 }
00467
00469 void Compiler::dumpTokens(std::wostream &dest) const
00470 {
00471 for (unsigned i = 0; i < tokens.size(); i++)
00472 dest << tokens[i].toWString() << std::endl;
00473 }
00474 };