aseba: aseba: compiler.cpp Source File

00001 /*
00002         Aseba - an event-based framework for distributed robot control
00003         Copyright (C) 2007--2012:
00004                 Stephane Magnenat <stephane at magnenat dot net>
00005                 (http://stephane.magnenat.net)
00006                 and other contributors, see authors.txt for details
00007         
00008         This program is free software: you can redistribute it and/or modify
00009         it under the terms of the GNU Lesser General Public License as published
00010         by the Free Software Foundation, version 3 of the License.
00011         
00012         This program is distributed in the hope that it will be useful,
00013         but WITHOUT ANY WARRANTY; without even the implied warranty of
00014         MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015         GNU Lesser General Public License for more details.
00016         
00017         You should have received a copy of the GNU Lesser General Public License
00018         along with this program. If not, see <http://www.gnu.org/licenses/>.
00019 */
00020 
00021 #include "compiler.h"
00022 #include "tree.h"
00023 #include "errors_code.h"
00024 #include "../common/consts.h"
00025 #include "../utils/utils.h"
00026 #include "../utils/FormatableString.h"
00027 #include <cassert>
00028 #include <cstdlib>
00029 #include <sstream>
00030 #include <iostream>
00031 #include <fstream>
00032 #include <iomanip>
00033 #include <memory>
00034 #include <limits>
00035 
00036 namespace Aseba
00037 {
00040         
00042         uint16 TargetDescription::crc() const
00043         {
00044                 uint16 crc(0);
00045                 crc = crcXModem(crc, bytecodeSize);
00046                 crc = crcXModem(crc, variablesSize);
00047                 crc = crcXModem(crc, stackSize);
00048                 for (size_t i = 0; i < namedVariables.size(); ++i)
00049                 {
00050                         crc = crcXModem(crc, namedVariables[i].size);
00051                         crc = crcXModem(crc, namedVariables[i].name);
00052                 }
00053                 for (size_t i = 0; i < localEvents.size(); ++i)
00054                 {
00055                         crc = crcXModem(crc, localEvents[i].name);
00056                 }
00057                 for (size_t i = 0; i < nativeFunctions.size(); ++i)
00058                 {
00059                         crc = crcXModem(crc, nativeFunctions[i].name);
00060                         for (size_t j = 0; j < nativeFunctions[i].parameters.size(); ++j)
00061                         {
00062                                 crc = crcXModem(crc, nativeFunctions[i].parameters[j].size);
00063                                 crc = crcXModem(crc, nativeFunctions[i].parameters[j].name);
00064                         }
00065                 }
00066                 return crc;
00067         }
00068         
00070         unsigned BytecodeElement::getWordSize() const
00071         {
00072                 switch (bytecode >> 12)
00073                 {
00074                         case ASEBA_BYTECODE_LARGE_IMMEDIATE:
00075                         case ASEBA_BYTECODE_LOAD_INDIRECT:
00076                         case ASEBA_BYTECODE_STORE_INDIRECT:
00077                         case ASEBA_BYTECODE_CONDITIONAL_BRANCH:
00078                         return 2;
00079                         
00080                         case ASEBA_BYTECODE_EMIT:
00081                         return 3;
00082                         
00083                         default:
00084                         return 1;
00085                 }
00086         }
00087         
00089         Compiler::Compiler()
00090         {
00091                 targetDescription = 0;
00092                 commonDefinitions = 0;
00093                 TranslatableError::setTranslateCB(ErrorMessages::defaultCallback);
00094         }
00095         
00097         void Compiler::setTargetDescription(const TargetDescription *description)
00098         {
00099                 targetDescription = description;
00100         }
00101         
00103         void Compiler::setCommonDefinitions(const CommonDefinitions *definitions)
00104         {
00105                 commonDefinitions = definitions;
00106         }
00107         
00115         bool Compiler::compile(std::wistream& source, BytecodeVector& bytecode, unsigned& allocatedVariablesCount, Error &errorDescription, std::wostream* dump)
00116         {
00117                 assert(targetDescription);
00118                 assert(commonDefinitions);
00119                 
00120                 unsigned indent = 0;
00121                 
00122                 // we need to build maps at each compilation in case previous ones produced errors and messed maps up
00123                 buildMaps();
00124                 if (freeVariableIndex > targetDescription->variablesSize)
00125                 {
00126                         errorDescription = TranslatableError(SourcePos(), ERROR_BROKEN_TARGET).toError();
00127                         return false;
00128                 }
00129                 
00130                 // tokenization
00131                 try
00132                 {
00133                         tokenize(source);
00134                 }
00135                 catch (TranslatableError error)
00136                 {
00137                         errorDescription = error.toError();
00138                         return false;
00139                 }
00140                 
00141                 if (dump)
00142                 {
00143                         *dump << "Dumping tokens:\n";
00144                         dumpTokens(*dump);
00145                         *dump << "\n\n";
00146                 }
00147                 
00148                 // parsing
00149                 std::auto_ptr<Node> program;
00150                 try
00151                 {
00152                         program.reset(parseProgram());
00153                 }
00154                 catch (TranslatableError error)
00155                 {
00156                         errorDescription = error.toError();
00157                         return false;
00158                 }
00159                 
00160                 if (dump)
00161                 {
00162                         *dump << "Vectorial syntax tree:\n";
00163                         program->dump(*dump, indent);
00164                         *dump << "\n\n";
00165                         *dump << "Checking the vectors' size:\n";
00166                 }
00167                 
00168                 // check vectors' size
00169                 try
00170                 {
00171                         program->checkVectorSize();
00172                 }
00173                 catch(TranslatableError error)
00174                 {
00175                         errorDescription = error.toError();
00176                         return false;
00177                 }
00178 
00179                 if (dump)
00180                 {
00181                         *dump << "Ok\n";
00182                         *dump << "\n\n";
00183                         *dump << "Expanding the syntax tree...\n";
00184                 }
00185 
00186                 // expand the syntax tree to Aseba-like syntax
00187                 try
00188                 {
00189                         Node* expandedProgram(program->expandToAsebaTree(dump));
00190                         program.release();
00191                         program.reset(expandedProgram);
00192                 }
00193                 catch (TranslatableError error)
00194                 {
00195                         errorDescription = error.toError();
00196                         return false;
00197                 }
00198 
00199                 if (dump)
00200                 {
00201                         *dump << "Expanded syntax tree before optimisation:\n";
00202                         program->dump(*dump, indent);
00203                         *dump << "\n\n";
00204                         *dump << "Type checking:\n";
00205                 }
00206 
00207                 // typecheck
00208                 try
00209                 {
00210                         program->typeCheck();
00211                 }
00212                 catch(TranslatableError error)
00213                 {
00214                         errorDescription = error.toError();
00215                         return false;
00216                 }
00217                 
00218                 if (dump)
00219                 {
00220                         *dump << "correct.\n";
00221                         *dump << "\n\n";
00222                         *dump << "Optimizations:\n";
00223                 }
00224                 
00225                 // optimization
00226                 try
00227                 {
00228                         Node* optimizedProgram(program->optimize(dump));
00229                         program.release();
00230                         program.reset(optimizedProgram);
00231                 }
00232                 catch (TranslatableError error)
00233                 {
00234                         errorDescription = error.toError();
00235                         return false;
00236                 }
00237                 
00238                 if (dump)
00239                 {
00240                         *dump << "\n\n";
00241                         *dump << "Syntax tree after optimization:\n";
00242                         program->dump(*dump, indent);
00243                         *dump << "\n\n";
00244                 }
00245                 
00246                 // set the number of allocated variables
00247                 allocatedVariablesCount = freeVariableIndex;
00248                 
00249                 if (dump)
00250                 {
00251                         const float fillPercentage = float(allocatedVariablesCount * 100.f) / float(targetDescription->variablesSize);
00252                         *dump << "Using " << allocatedVariablesCount << " on " << targetDescription->variablesSize << " (" << fillPercentage << " %) words of variable space\n";
00253                         *dump << "\n\n";
00254                 }
00255                 
00256                 // code generation
00257                 PreLinkBytecode preLinkBytecode;
00258                 program->emit(preLinkBytecode);
00259                 
00260                 // fix-up (add of missing STOP and RET bytecodes at code generation)
00261                 preLinkBytecode.fixup(subroutineTable);
00262                 
00263                 // stack check
00264                 if (!verifyStackCalls(preLinkBytecode))
00265                 {
00266                         errorDescription = TranslatableError(SourcePos(), ERROR_STACK_OVERFLOW).toError();
00267                         return false;
00268                 }
00269                 
00270                 // linking (flattening of complex structure into linear vector)
00271                 if (!link(preLinkBytecode, bytecode))
00272                 {
00273                         errorDescription = TranslatableError(SourcePos(), ERROR_SCRIPT_TOO_BIG).toError();
00274                         return false;
00275                 }
00276                 
00277                 if (dump)
00278                 {
00279                         *dump << "Bytecode:\n";
00280                         disassemble(bytecode, preLinkBytecode, *dump);
00281                         *dump << "\n\n";
00282                 }
00283                 
00284                 return true;
00285         }
00286         
00288         bool Compiler::link(const PreLinkBytecode& preLinkBytecode, BytecodeVector& bytecode)
00289         {
00290                 bytecode.clear();
00291                 
00292                 // event vector table size
00293                 unsigned addr = preLinkBytecode.events.size() * 2 + 1;
00294                 bytecode.push_back(addr);
00295                 
00296                 // events
00297                 for (PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.events.begin();
00298                         it != preLinkBytecode.events.end();
00299                         ++it
00300                 )
00301                 {
00302                         bytecode.push_back(it->first);          // id
00303                         bytecode.push_back(addr);                       // addr
00304                         addr += it->second.size();                      // next bytecode addr
00305                 }
00306                 
00307                 // evPreLinkBytecode::ents bytecode
00308                 for (PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.events.begin();
00309                         it != preLinkBytecode.events.end();
00310                         ++it
00311                 )
00312                 {
00313                         std::copy(it->second.begin(), it->second.end(), std::back_inserter(bytecode));
00314                 }
00315                 
00316                 // subrountines bytecode
00317                 for (PreLinkBytecode::SubroutinesBytecode::const_iterator it = preLinkBytecode.subroutines.begin();
00318                         it != preLinkBytecode.subroutines.end();
00319                         ++it
00320                 )
00321                 {
00322                         subroutineTable[it->first].address = bytecode.size();
00323                         std::copy(it->second.begin(), it->second.end(), std::back_inserter(bytecode));
00324                 }
00325                 
00326                 // resolve subroutines call addresses
00327                 for (size_t pc = 0; pc < bytecode.size();)
00328                 {
00329                         BytecodeElement &element(bytecode[pc]);
00330                         if (element.bytecode >> 12 == ASEBA_BYTECODE_SUB_CALL)
00331                         {
00332                                 const unsigned id = element.bytecode & 0x0fff;
00333                                 assert(id < subroutineTable.size());
00334                                 const unsigned address = subroutineTable[id].address;
00335                                 element.bytecode &= 0xf000;
00336                                 element.bytecode |= address;
00337                         }
00338                         pc += element.getWordSize();
00339                 }
00340                 
00341                 // check size
00342                 return bytecode.size() <= targetDescription->bytecodeSize;
00343         }
00344         
00346         void BytecodeVector::changeStopToRetSub()
00347         {
00348                 const unsigned bytecodeEndPos(size());
00349                 for (unsigned pc = 0; pc < bytecodeEndPos;)
00350                 {
00351                         BytecodeElement &element((*this)[pc]);
00352                         if ((element.bytecode >> 12) == ASEBA_BYTECODE_STOP)
00353                                 element.bytecode = AsebaBytecodeFromId(ASEBA_BYTECODE_SUB_RET);
00354                         pc += element.getWordSize();
00355                 }
00356         }
00357         
00359         unsigned short BytecodeVector::getTypeOfLast() const
00360         {
00361                 unsigned short type(16); // invalid type
00362                 for (size_t pc = 0; pc < size();)
00363                 {
00364                         const BytecodeElement &element((*this)[pc]);
00365                         type = element.bytecode >> 12;
00366                         pc += element.getWordSize();
00367                 }
00368                 return type;
00369         }
00370         
00372         BytecodeVector::EventAddressesToIdsMap BytecodeVector::getEventAddressesToIds() const
00373         {
00374                 EventAddressesToIdsMap eventAddr;
00375                 const unsigned eventVectSize = (*this)[0];
00376                 unsigned pc = 1;
00377                 while (pc < eventVectSize)
00378                 {
00379                         eventAddr[(*this)[pc + 1]] = (*this)[pc];
00380                         pc += 2;
00381                 }
00382                 return eventAddr;
00383         }
00384         
00386         void Compiler::disassemble(BytecodeVector& bytecode, const PreLinkBytecode& preLinkBytecode, std::wostream& dump) const
00387         {
00388                 // address of threads and subroutines
00389                 const BytecodeVector::EventAddressesToIdsMap eventAddr(bytecode.getEventAddressesToIds());
00390                 std::map<unsigned, unsigned> subroutinesAddr;
00391                 
00392                 // build subroutine map
00393                 for (size_t id = 0; id < subroutineTable.size(); ++id)
00394                         subroutinesAddr[subroutineTable[id].address] = id;
00395                 
00396                 // event table
00397                 const unsigned eventCount = eventAddr.size();
00398                 const float fillPercentage = float(bytecode.size() * 100.f) / float(targetDescription->bytecodeSize);
00399                 dump << "Disassembling " << eventCount + subroutineTable.size() << " segments (" << bytecode.size() << " words on " << targetDescription->bytecodeSize << ", " << fillPercentage << "% filled):\n";
00400                 
00401                 // bytecode
00402                 unsigned pc = eventCount*2 + 1;
00403                 while (pc < bytecode.size())
00404                 {
00405                         if (eventAddr.find(pc) != eventAddr.end())
00406                         {
00407                                 const unsigned eventId = eventAddr.at(pc);
00408                                 if (eventId == ASEBA_EVENT_INIT)
00409                                         dump << "init:       ";
00410                                 else
00411                                 {
00412                                         if (eventId < 0x1000)
00413                                         {
00414                                                 if (eventId < commonDefinitions->events.size())
00415                                                         dump << "event " << commonDefinitions->events[eventId].name << ": ";
00416                                                 else
00417                                                         dump << "unknown global event " << eventId << ": ";
00418                                         }
00419                                         else
00420                                         {
00421                                                 int index = ASEBA_EVENT_LOCAL_EVENTS_START - eventId;
00422                                                 if (index < (int)targetDescription->localEvents.size())
00423                                                         dump << "event " << targetDescription->localEvents[index].name << ": ";
00424                                                 else
00425                                                         dump << "unknown local event " << index << ": ";
00426                                         }
00427                                 }
00428                                 
00429                                 PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.events.find(eventId);
00430                                 assert(it != preLinkBytecode.events.end());
00431                                 dump << " (max stack " << it->second.maxStackDepth << ")\n";
00432                         }
00433                         
00434                         if (subroutinesAddr.find(pc) != subroutinesAddr.end())
00435                         {
00436                                 PreLinkBytecode::EventsBytecode::const_iterator it = preLinkBytecode.subroutines.find(subroutinesAddr[pc]);
00437                                 assert(it != preLinkBytecode.subroutines.end());
00438                                 dump << "sub " << subroutineTable[subroutinesAddr[pc]].name << ": (max stack " << it->second.maxStackDepth << ")\n";
00439                         }
00440                         
00441                         dump << "    ";
00442                         dump << pc << " (" << bytecode[pc].line << ") : ";
00443                         switch (bytecode[pc] >> 12)
00444                         {
00445                                 case ASEBA_BYTECODE_STOP:
00446                                 dump << "STOP\n";
00447                                 pc++;
00448                                 break;
00449                                 
00450                                 case ASEBA_BYTECODE_SMALL_IMMEDIATE:
00451                                 dump << "SMALL_IMMEDIATE " << ((signed short)(bytecode[pc] << 4) >> 4) << "\n";
00452                                 pc++;
00453                                 break;
00454                                 
00455                                 case ASEBA_BYTECODE_LARGE_IMMEDIATE:
00456                                 dump << "LARGE_IMMEDIATE " << ((signed short)bytecode[pc+1]) << "\n";
00457                                 pc += 2;
00458                                 break;
00459                                 
00460                                 case ASEBA_BYTECODE_LOAD:
00461                                 dump << "LOAD " << (bytecode[pc] & 0x0fff) << "\n";
00462                                 pc++;
00463                                 break;
00464                                 
00465                                 case ASEBA_BYTECODE_STORE:
00466                                 dump << "STORE " << (bytecode[pc] & 0x0fff) << "\n";
00467                                 pc++;
00468                                 break;
00469                                 
00470                                 case ASEBA_BYTECODE_LOAD_INDIRECT:
00471                                 dump << "LOAD_INDIRECT in array at " << (bytecode[pc] & 0x0fff) << " of size " << bytecode[pc+1] << "\n";
00472                                 pc += 2;
00473                                 break;
00474                                 
00475                                 case ASEBA_BYTECODE_STORE_INDIRECT:
00476                                 dump << "STORE_INDIRECT in array at " << (bytecode[pc] & 0x0fff) << " of size " << bytecode[pc+1] << "\n";
00477                                 pc += 2;
00478                                 break;
00479                                 
00480                                 case ASEBA_BYTECODE_UNARY_ARITHMETIC:
00481                                 dump << "UNARY_ARITHMETIC ";
00482                                 dump << unaryOperatorToString((AsebaUnaryOperator)(bytecode[pc] & ASEBA_UNARY_OPERATOR_MASK)) << "\n";
00483                                 pc++;
00484                                 break;
00485                                 
00486                                 case ASEBA_BYTECODE_BINARY_ARITHMETIC:
00487                                 dump << "BINARY_ARITHMETIC ";
00488                                 dump << binaryOperatorToString((AsebaBinaryOperator)(bytecode[pc] & ASEBA_BINARY_OPERATOR_MASK)) << "\n";
00489                                 pc++;
00490                                 break;
00491                                 
00492                                 case ASEBA_BYTECODE_JUMP:
00493                                 dump << "JUMP " << ((signed short)(bytecode[pc] << 4) >> 4) << "\n";
00494                                 pc++;
00495                                 break;
00496                                 
00497                                 case ASEBA_BYTECODE_CONDITIONAL_BRANCH:
00498                                 dump << "CONDITIONAL_BRANCH ";
00499                                 dump << binaryOperatorToString((AsebaBinaryOperator)(bytecode[pc] & ASEBA_BINARY_OPERATOR_MASK));
00500                                 if (bytecode[pc] & (1 << ASEBA_IF_IS_WHEN_BIT))
00501                                         dump << " (edge), ";
00502                                 else
00503                                         dump << ", ";
00504                                 dump << "skip " << ((signed short)bytecode[pc+1]) << " if false" << "\n";
00505                                 pc += 2;
00506                                 break;
00507                                 
00508                                 case ASEBA_BYTECODE_EMIT:
00509                                 {
00510                                         unsigned eventId = (bytecode[pc] & 0x0fff);
00511                                         dump << "EMIT ";
00512                                         if (eventId < commonDefinitions->events.size())
00513                                                 dump << commonDefinitions->events[eventId].name;
00514                                         else
00515                                                 dump << eventId;
00516                                         dump << " addr " << bytecode[pc+1] << " size " << bytecode[pc+2] << "\n";
00517                                         pc += 3;
00518                                 }
00519                                 break;
00520                                 
00521                                 case ASEBA_BYTECODE_NATIVE_CALL:
00522                                 dump << "CALL " << (bytecode[pc] & 0x0fff) << "\n";
00523                                 pc++;
00524                                 break;
00525                                 
00526                                 case ASEBA_BYTECODE_SUB_CALL:
00527                                 {
00528                                         unsigned address = (bytecode[pc] & 0x0fff);
00529                                         std::wstring name(L"unknown");
00530                                         for (size_t i = 0; i < subroutineTable.size(); i++)
00531                                                 if (subroutineTable[i].address == address)
00532                                                         name = subroutineTable[i].name;
00533                                         dump << "SUB_CALL to " << name << " @ " << address << "\n";
00534                                         pc++;
00535                                 }
00536                                 break;
00537                                 
00538                                 case ASEBA_BYTECODE_SUB_RET:
00539                                 dump << "SUB_RET\n";
00540                                 pc++;
00541                                 break;
00542                                 
00543                                 default:
00544                                 dump << "?\n";
00545                                 pc++;
00546                                 break;
00547                         }
00548                 }
00549         }
00550         
00553 } // Aseba