00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #include "compiler.h"
00025 #include "../common/consts.h"
00026 #include "tree.h"
00027 #include <cassert>
00028 #include <cstdlib>
00029 #include <sstream>
00030 #include <iostream>
00031 #include <fstream>
00032 #include <iomanip>
00033 #include <memory>
00034
00035 namespace Aseba
00036 {
00039
00041 std::string SourcePos::toString() const
00042 {
00043 if (valid)
00044 {
00045 std::ostringstream oss;
00046 oss << row + 1 << ":" << column;
00047 return oss.str();
00048 }
00049 else
00050 return "";
00051 }
00052
00053 bool NamedValuesVector::contains(const std::string& s, size_t* position) const
00054 {
00055 for (size_t i = 0; i < size(); i++)
00056 {
00057 if ((*this)[i].name == s)
00058 {
00059 if (position)
00060 *position = i;
00061 return true;
00062 }
00063 }
00064 return false;
00065 }
00066
00068 Compiler::Compiler()
00069 {
00070 targetDescription = 0;
00071 commonDefinitions = 0;
00072 }
00073
00075 void Compiler::setTargetDescription(const TargetDescription *description)
00076 {
00077 targetDescription = description;
00078 buildMaps();
00079 }
00080
00082 void Compiler::setCommonDefinitions(const CommonDefinitions *definitions)
00083 {
00084 commonDefinitions = definitions;
00085 }
00086
00094 bool Compiler::compile(std::istream& source, BytecodeVector& bytecode, unsigned& allocatedVariablesCount, Error &errorDescription, std::ostream* dump)
00095 {
00096 assert(targetDescription);
00097
00098 Node *program;
00099 unsigned indent = 0;
00100
00101
00102 buildMaps();
00103 if (freeVariableIndex > targetDescription->variablesSize)
00104 {
00105 errorDescription = Error(SourcePos(), "Broken target description: not enough room for internal variables");
00106 return false;
00107 }
00108
00109
00110 try
00111 {
00112 tokenize(source);
00113 }
00114 catch (Error error)
00115 {
00116 errorDescription = error;
00117 return false;
00118 }
00119
00120 if (dump)
00121 {
00122 *dump << "Dumping tokens:\n";
00123 dumpTokens(*dump);
00124 *dump << "\n\n";
00125 }
00126
00127
00128 try
00129 {
00130 program = parseProgram();
00131 }
00132 catch (Error error)
00133 {
00134 errorDescription = error;
00135 return false;
00136 }
00137
00138 if (dump)
00139 {
00140 *dump << "Syntax tree before optimisation:\n";
00141 program->dump(*dump, indent);
00142 *dump << "\n\n";
00143 *dump << "Type checking:\n";
00144 }
00145
00146
00147 try
00148 {
00149 program->typeCheck();
00150 }
00151 catch(Error error)
00152 {
00153 delete program;
00154 errorDescription = error;
00155 return false;
00156 }
00157
00158 if (dump)
00159 {
00160 *dump << "correct.\n";
00161 *dump << "\n\n";
00162 *dump << "Optimizations:\n";
00163 }
00164
00165
00166 try
00167 {
00168 program = program->optimize(dump);
00169 }
00170 catch (Error error)
00171 {
00172 delete program;
00173 errorDescription = error;
00174 return false;
00175 }
00176
00177 if (dump)
00178 {
00179 *dump << "\n\n";
00180 *dump << "Syntax tree after optimization:\n";
00181 program->dump(*dump, indent);
00182 *dump << "\n\n";
00183 }
00184
00185
00186 allocatedVariablesCount = freeVariableIndex;
00187
00188
00189 PreLinkBytecode preLinkBytecode;
00190 program->emit(preLinkBytecode);
00191 delete program;
00192
00193
00194 preLinkBytecode.fixup(subroutineTable);
00195
00196
00197 if (!verifyStackCalls(preLinkBytecode))
00198 {
00199 errorDescription = Error(SourcePos(), "Execution stack will overflow, check for any recursive subroutine call and cut long mathematical expressions.");
00200 return false;
00201 }
00202
00203
00204 if (!link(preLinkBytecode, bytecode))
00205 {
00206 errorDescription = Error(SourcePos(), "Script too big for target bytecode size.");
00207 return false;
00208 }
00209
00210 if (dump)
00211 {
00212 *dump << "Bytecode:\n";
00213 disassemble(bytecode, preLinkBytecode, *dump);
00214 *dump << "\n\n";
00215 }
00216
00217 return true;
00218 }
00219
00221 bool Compiler::link(const PreLinkBytecode& preLinkBytecode, BytecodeVector& bytecode)
00222 {
00223 bytecode.clear();
00224
00225
00226 unsigned addr = preLinkBytecode.events.size() * 2 + 1;
00227 bytecode.push_back(addr);
00228
00229
00230 for (PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.events.begin();
00231 it != preLinkBytecode.events.end();
00232 ++it
00233 )
00234 {
00235 bytecode.push_back(it->first);
00236 bytecode.push_back(addr);
00237 addr += it->second.size();
00238 }
00239
00240
00241 for (PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.events.begin();
00242 it != preLinkBytecode.events.end();
00243 ++it
00244 )
00245 {
00246 std::copy(it->second.begin(), it->second.end(), std::back_inserter(bytecode));
00247 }
00248
00249
00250 for (PreLinkBytecode::SubroutinesBytecode::const_iterator it = preLinkBytecode.subroutines.begin();
00251 it != preLinkBytecode.subroutines.end();
00252 ++it
00253 )
00254 {
00255 subroutineTable[it->first].address = bytecode.size();
00256 std::copy(it->second.begin(), it->second.end(), std::back_inserter(bytecode));
00257 }
00258
00259
00260 for (size_t pc = 0; pc < bytecode.size();)
00261 {
00262 switch (bytecode[pc] >> 12)
00263 {
00264 case ASEBA_BYTECODE_SUB_CALL:
00265 {
00266 unsigned id = bytecode[pc] & 0x0fff;
00267 assert(id < subroutineTable.size());
00268 unsigned address = subroutineTable[id].address;
00269 bytecode[pc].bytecode &= 0xf000;
00270 bytecode[pc].bytecode |= address;
00271 pc += 1;
00272 }
00273 break;
00274
00275 case ASEBA_BYTECODE_LARGE_IMMEDIATE:
00276 case ASEBA_BYTECODE_LOAD_INDIRECT:
00277 case ASEBA_BYTECODE_STORE_INDIRECT:
00278 case ASEBA_BYTECODE_CONDITIONAL_BRANCH:
00279 pc += 2;
00280 break;
00281
00282 case ASEBA_BYTECODE_EMIT:
00283 pc += 3;
00284 break;
00285
00286 default:
00287 pc += 1;
00288 break;
00289 }
00290 }
00291
00292
00293 return bytecode.size() <= targetDescription->bytecodeSize;
00294 }
00295
00297 void Compiler::disassemble(BytecodeVector& bytecode, const PreLinkBytecode& preLinkBytecode, std::ostream& dump) const
00298 {
00299
00300 std::map<unsigned, unsigned> eventAddr;
00301 std::map<unsigned, unsigned> subroutinesAddr;
00302
00303
00304 for (size_t id = 0; id < subroutineTable.size(); ++id)
00305 subroutinesAddr[subroutineTable[id].address] = id;
00306
00307
00308 const unsigned eventVectSize = bytecode[0];
00309 unsigned pc = 1;
00310 while (pc < eventVectSize)
00311 {
00312 eventAddr[bytecode[pc + 1]] = bytecode[pc];
00313 pc += 2;
00314 }
00315 const unsigned eventCount = (eventVectSize - 1 ) / 2;
00316 const float fillPrecentage = float(bytecode.size() * 100.f) / float(targetDescription->bytecodeSize);
00317 dump << "Disassembling " << eventCount + subroutineTable.size() << " segments (" << bytecode.size() << " words on " << targetDescription->bytecodeSize << ", " << fillPrecentage << "% filled):\n";
00318
00319
00320 while (pc < bytecode.size())
00321 {
00322 if (eventAddr.find(pc) != eventAddr.end())
00323 {
00324 unsigned eventId = eventAddr[pc];
00325 if (eventId == ASEBA_EVENT_INIT)
00326 dump << "init: ";
00327 else
00328 {
00329 if (eventId < 0x1000)
00330 {
00331 if (eventId < commonDefinitions->events.size())
00332 dump << "event " << commonDefinitions->events[eventId].name << ": ";
00333 else
00334 dump << "unknown global event " << eventId << ": ";
00335 }
00336 else
00337 {
00338 int index = ASEBA_EVENT_LOCAL_EVENTS_START - eventId;
00339 if (index < (int)targetDescription->localEvents.size())
00340 dump << "event " << targetDescription->localEvents[index].name << ": ";
00341 else
00342 dump << "unknown local event " << index << ": ";
00343 }
00344 }
00345
00346 PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.events.find(eventId);
00347 assert(it != preLinkBytecode.events.end());
00348 dump << " (max stack " << it->second.maxStackDepth << ")\n";
00349 }
00350
00351 if (subroutinesAddr.find(pc) != subroutinesAddr.end())
00352 {
00353 PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.subroutines.find(subroutinesAddr[pc]);
00354 assert(it != preLinkBytecode.subroutines.end());
00355 dump << "sub " << subroutineTable[subroutinesAddr[pc]].name << ": (max stack " << it->second.maxStackDepth << ")\n";
00356 }
00357
00358 dump << " ";
00359 dump << pc << " (" << bytecode[pc].line << ") : ";
00360 switch (bytecode[pc] >> 12)
00361 {
00362 case ASEBA_BYTECODE_STOP:
00363 dump << "STOP\n";
00364 pc++;
00365 break;
00366
00367 case ASEBA_BYTECODE_SMALL_IMMEDIATE:
00368 dump << "SMALL_IMMEDIATE " << ((signed short)(bytecode[pc] << 4) >> 4) << "\n";
00369 pc++;
00370 break;
00371
00372 case ASEBA_BYTECODE_LARGE_IMMEDIATE:
00373 dump << "LARGE_IMMEDIATE " << ((signed short)bytecode[pc+1]) << "\n";
00374 pc += 2;
00375 break;
00376
00377 case ASEBA_BYTECODE_LOAD:
00378 dump << "LOAD " << (bytecode[pc] & 0x0fff) << "\n";
00379 pc++;
00380 break;
00381
00382 case ASEBA_BYTECODE_STORE:
00383 dump << "STORE " << (bytecode[pc] & 0x0fff) << "\n";
00384 pc++;
00385 break;
00386
00387 case ASEBA_BYTECODE_LOAD_INDIRECT:
00388 dump << "LOAD_INDIRECT in array at " << (bytecode[pc] & 0x0fff) << " of size " << bytecode[pc+1] << "\n";
00389 pc += 2;
00390 break;
00391
00392 case ASEBA_BYTECODE_STORE_INDIRECT:
00393 dump << "STORE_INDIRECT in array at " << (bytecode[pc] & 0x0fff) << " of size " << bytecode[pc+1] << "\n";
00394 pc += 2;
00395 break;
00396
00397 case ASEBA_BYTECODE_UNARY_ARITHMETIC:
00398 dump << "UNARY_ARITHMETIC ";
00399 dump << unaryOperatorToString((AsebaUnaryOperator)(bytecode[pc] & ASEBA_UNARY_OPERATOR_MASK)) << "\n";
00400 pc++;
00401 break;
00402
00403 case ASEBA_BYTECODE_BINARY_ARITHMETIC:
00404 dump << "BINARY_ARITHMETIC ";
00405 dump << binaryOperatorToString((AsebaBinaryOperator)(bytecode[pc] & ASEBA_BINARY_OPERATOR_MASK)) << "\n";
00406 pc++;
00407 break;
00408
00409 case ASEBA_BYTECODE_JUMP:
00410 dump << "JUMP " << ((signed short)(bytecode[pc] << 4) >> 4) << "\n";
00411 pc++;
00412 break;
00413
00414 case ASEBA_BYTECODE_CONDITIONAL_BRANCH:
00415 dump << "CONDITIONAL_BRANCH ";
00416 dump << binaryOperatorToString((AsebaBinaryOperator)(bytecode[pc] & ASEBA_BINARY_OPERATOR_MASK));
00417 if (bytecode[pc] & (1 << ASEBA_IF_IS_WHEN_BIT))
00418 dump << " (edge), ";
00419 else
00420 dump << ", ";
00421 dump << "skip " << ((signed short)bytecode[pc+1]) << " if false" << "\n";
00422 pc += 2;
00423 break;
00424
00425 case ASEBA_BYTECODE_EMIT:
00426 {
00427 unsigned eventId = (bytecode[pc] & 0x0fff);
00428 dump << "EMIT ";
00429 if (eventId < commonDefinitions->events.size())
00430 dump << commonDefinitions->events[eventId].name;
00431 else
00432 dump << eventId;
00433 dump << " addr " << bytecode[pc+1] << " size " << bytecode[pc+2] << "\n";
00434 pc += 3;
00435 }
00436 break;
00437
00438 case ASEBA_BYTECODE_NATIVE_CALL:
00439 dump << "CALL " << (bytecode[pc] & 0x0fff) << "\n";
00440 pc++;
00441 break;
00442
00443 case ASEBA_BYTECODE_SUB_CALL:
00444 {
00445 unsigned address = (bytecode[pc] & 0x0fff);
00446 std::string name("unknown");
00447 for (size_t i = 0; i < subroutineTable.size(); i++)
00448 if (subroutineTable[i].address == address)
00449 name = subroutineTable[i].name;
00450 dump << "SUB_CALL to " << name << " @ " << address << "\n";
00451 pc++;
00452 }
00453 break;
00454
00455 case ASEBA_BYTECODE_SUB_RET:
00456 dump << "SUB_RET\n";
00457 pc++;
00458 break;
00459
00460 default:
00461 dump << "?\n";
00462 pc++;
00463 break;
00464 }
00465 }
00466 }
00467
00469 void Compiler::buildMaps()
00470 {
00471 assert(targetDescription);
00472 freeVariableIndex = 0;
00473
00474
00475 implementedEvents.clear();
00476 subroutineTable.clear();
00477 subroutineReverseTable.clear();
00478
00479
00480 variablesMap.clear();
00481 for (unsigned i = 0; i < targetDescription->namedVariables.size(); i++)
00482 {
00483 variablesMap[targetDescription->namedVariables[i].name] =
00484 std::make_pair(freeVariableIndex, targetDescription->namedVariables[i].size);
00485 freeVariableIndex += targetDescription->namedVariables[i].size;
00486 }
00487
00488
00489 functionsMap.clear();
00490 for (unsigned i = 0; i < targetDescription->nativeFunctions.size(); i++)
00491 {
00492 functionsMap[targetDescription->nativeFunctions[i].name] = i;
00493 }
00494 }
00495
00498 }