EasyScanner.cpp
Go to the documentation of this file.
00001 
00006 #include <iostream>
00007 #include <cstdio>
00008 #include <cctype>
00009 #include <cstdlib>
00010 #include <cmath>
00011 #include <cstring>
00012 #include <boost/format.hpp>
00013 #include <errno.h>
00014 
00015 #include "EasyScanner.h"
00016 
00017 using namespace std;
00018 using namespace boost;
00019 using namespace hrp;
00020 
00021 
00022 // Replacement for 'strtod()' function in Visual C++
00023 // This is neccessary because the implementation of VC++6.0 uses 'strlen()' in the function,
00024 // so that it becomes too slow for a string buffer which has long length.
00025 #ifdef _MSC_VER
00026 static double mystrtod(const char* nptr, char** endptr)
00027 {
00028     const char* org = nptr;
00029     bool valid = false;
00030     double value = 0.0;
00031     double sign = +1.0;
00032 
00033     if(*nptr == '+'){
00034         nptr++;
00035     } else if(*nptr == '-'){
00036         sign = -1.0;
00037         nptr++;
00038     }
00039     if(isdigit((unsigned char)*nptr)){
00040         valid = true;
00041         do {
00042             value = value * 10.0 + (*nptr - '0');
00043             nptr++;
00044         } while(isdigit((unsigned char)*nptr));
00045     }
00046     if(*nptr == '.'){
00047         valid = false;
00048         nptr++;
00049         if(isdigit((unsigned char)*nptr)){
00050             double small = 0.1;
00051             valid = true;
00052             do {
00053                 value += small * (*nptr - '0');
00054                 small *= 0.1;
00055                 nptr++;
00056             } while(isdigit((unsigned char)*nptr));
00057         }
00058     }
00059     if(valid && (*nptr == 'e' || *nptr == 'E')){
00060         nptr++;
00061         valid = false;
00062         double psign = +1.0;
00063         if(*nptr == '+'){
00064             nptr++;
00065         } else if(*nptr == '-'){
00066             psign = -1.0;
00067             nptr++;
00068         }
00069         if(isdigit((unsigned char)*nptr)){
00070             valid = true;
00071             double p = 0.0;
00072             do {
00073                 p = p * 10.0 + (*nptr - '0');
00074                 nptr++;
00075             } while(isdigit((unsigned char)*nptr));
00076             value *= pow(10.0, psign * p);
00077         }
00078     }
00079     if(valid){
00080         *endptr = (char*)nptr;
00081     } else {
00082         *endptr = (char*)org;
00083     }
00084     return sign * value;
00085 }
00086 #else
00087 static inline double mystrtod(const char* nptr, char** endptr) {
00088     return strtod(nptr, endptr);
00089 }
00090 #endif
00091 
00092 
00093 std::string EasyScanner::Exception::getFullMessage()
00094 {
00095     string m(message);
00096     
00097     if(lineNumber > 0){
00098         m += str(format(" at line %1%") % lineNumber);
00099     }
00100         
00101     if(!filename.empty()){
00102         m += str(format(" of %1%") % filename);
00103     }
00104     
00105     return m;
00106 }
00107 
00108 
00109 EasyScanner::EasyScanner()
00110 {
00111     init();
00112 }
00113 
00114 
00118 EasyScanner::EasyScanner(string filename)
00119 {
00120     init();
00121     loadFile(filename);
00122 }
00123 
00124 
00125 void EasyScanner::init()
00126 {
00127     textBuf = 0;
00128     size = 0;
00129     textBufEnd = 0;
00130     lineNumberOffset = 1;
00131     
00132     commentChar = '#';
00133     quoteChar = 0xffff;
00134     isLineOriented = true;
00135     defaultErrorMessage = "unknown error of the lexical scanner";
00136 
00137     whiteSpaceChars.push_back(' ');
00138     whiteSpaceChars.push_back('\t');
00139 
00140     symbols.reset(new SymbolMap());
00141 }
00142 
00143 
00149 EasyScanner::EasyScanner(const EasyScanner& org, bool copyText) :
00150     whiteSpaceChars(org.whiteSpaceChars)
00151 {
00152     commentChar = org.commentChar;
00153     quoteChar = org.quoteChar;
00154     isLineOriented = org.isLineOriented;
00155     filename = org.filename;
00156     defaultErrorMessage = org.defaultErrorMessage;
00157     lineNumber = org.lineNumber;
00158     lineNumberOffset = org.lineNumberOffset;
00159 
00160     symbols = org.symbols;
00161 
00162     if(copyText && org.textBuf){
00163         size = org.size;
00164         textBuf = new char[size+1];
00165         memcpy(textBuf, org.textBuf, size+1);
00166         text = textBuf;
00167         textBufEnd = textBuf + size;
00168     } else {
00169         textBuf = 0;
00170         size = 0;
00171         textBufEnd = 0;
00172     }
00173 }
00174 
00175 
00177 void EasyScanner::setText(const char* text, int len)
00178 {
00179     if(textBuf) delete[] textBuf;
00180 
00181     size = len;
00182     textBuf = new char[size+1];
00183     memcpy(textBuf, text, len);
00184     textBuf[size] = 0;
00185     this->text = textBuf;
00186     textBufEnd = textBuf + size;
00187     lineNumber = lineNumberOffset;
00188     filename = "";
00189 }
00190 
00191 
00192 EasyScanner::~EasyScanner()
00193 {
00194     if(textBuf) delete[] textBuf;
00195 }
00196 
00197 
00198 void EasyScanner::setLineNumberOffset(int offset)
00199 {
00200     lineNumberOffset = offset;
00201 }
00202 
00203 
00204 void EasyScanner::moveToHead()
00205 {
00206     text = textBuf;
00207     lineNumber = lineNumberOffset;
00208 }
00209 
00210 
00211 void EasyScanner::putSymbols()
00212 {
00213     SymbolMap::iterator p = symbols->begin();
00214     while(p != symbols->end()){
00215         cout << p->first << " = " << p->second << std::endl;
00216         p++;
00217     }
00218 }
00219 
00220 
00221 void EasyScanner::throwException(const char* message)
00222 {
00223     Exception ex;
00224     ex.message = message ? message : defaultErrorMessage;
00225     ex.filename = filename;
00226     ex.lineNumber = lineNumber;
00227     throw ex;
00228 }
00229 
00230 
00231 void EasyScanner::throwException(const std::string& message)
00232 {
00233     throwException(message.c_str());
00234 }
00235 
00236 
00241 void EasyScanner::setCommentChar(char cc)
00242 {
00243     commentChar = cc ? cc : 0xffff;
00244 }
00245 
00246 
00247 void EasyScanner::setLineOriented(bool on)
00248 {
00249     isLineOriented = on;
00250 }
00251 
00252 
00254 void EasyScanner::setWhiteSpaceChar(char ws)
00255 {
00256     whiteSpaceChars.push_back(ws);
00257 }
00258 
00259 
00264 void EasyScanner::setQuoteChar(char qs)
00265 {
00266     quoteChar = qs;
00267 }
00268 
00269 
00274 void EasyScanner::loadFile(const string& filename)
00275 {
00276     this->filename.clear();
00277     
00278     FILE* file = fopen(filename.c_str(), "rb");
00279 
00280     if(!file){
00281         this->lineNumber = -1;
00282         string message;
00283         switch(errno){
00284         case ENOENT:
00285             message = filename + " cannot be found.";
00286             break;
00287         default:
00288             message = string("I/O error in accessing ") + filename;
00289             break;
00290         }
00291         throwException(message.c_str());
00292     }
00293 
00294     this->filename = filename;
00295 
00296     fseek(file, 0, SEEK_END);
00297     size = ftell(file);
00298     rewind(file);
00299     if(textBuf) delete[] textBuf;
00300     textBuf = new char[size+1];
00301     fread(textBuf, sizeof(char), size, file);
00302     textBuf[size] = 0;
00303     fclose(file);
00304     text = textBuf;
00305     textBufEnd = textBuf + size;
00306     lineNumber = lineNumberOffset;
00307 }
00308 
00309 
00313 inline void EasyScanner::skipToLineEnd()
00314 {
00315     while(*text != '\r' && *text != '\n' && *text != '\0') text++;
00316 }
00317 
00318 
00319 void EasyScanner::skipSpace()
00320 {
00321     int n = whiteSpaceChars.size();
00322     while(true){
00323         int i=0;
00324         while(i < n){
00325             if(*text == whiteSpaceChars[i]){
00326                 text++;
00327                 i = 0;
00328             } else {
00329                 i++;
00330             }
00331         }
00332         if(*text == commentChar){
00333             text++;
00334             skipToLineEnd();
00335         }
00336         
00337         if(isLineOriented){
00338             break;
00339         }
00340         if(*text == '\n'){
00341             text++;
00342         } else if(*text == '\r'){
00343             text++;
00344             if(*text == '\n'){
00345                 text++;
00346             }
00347         } else {
00348             break;
00349         }
00350         lineNumber++;
00351     }
00352 }
00353 
00354 
00359 bool EasyScanner::readLF0()
00360 {
00361     if(*text == '\n'){
00362         text++;
00363         lineNumber++;
00364         return true;
00365     } else if(*text == '\r'){
00366         text++;
00367         if(*text == '\n'){
00368             text++;
00369         }
00370         lineNumber++;
00371         return true;
00372     }
00373     return false;
00374 }
00375 
00376 
00377 bool EasyScanner::checkLF()
00378 {
00379     char* current = text;
00380     if(readLF()){
00381         text = current;
00382         return true;
00383     }
00384     return false;
00385 }
00386 
00387 
00388 int EasyScanner::readToken()
00389 {
00390     skipSpace();
00391 
00392     if(isdigit((unsigned char)*text) || *text == '+' || *text == '-'){
00393         char* tail;
00394         intValue = strtol(text, &tail, 0);
00395         if(tail != text){
00396             text = tail;
00397             return T_INTEGER;
00398         }
00399         doubleValue = mystrtod(text, &tail);
00400         if(tail != text){
00401             text = tail;
00402             return T_DOUBLE;
00403         }
00404         charValue = *text;
00405         text++;
00406         return T_SIGLUM;
00407 
00408     } else if(isalpha((unsigned char)*text)){
00409         char* org = text;
00410         text++;
00411         while(isalnum((unsigned char)*text) || *text == '_') text++;
00412         stringValue.assign(org, text - org);
00413         if(stringValue.size() == 1){
00414             charValue = *org;
00415             return T_ALPHABET;
00416         } else {
00417             return T_WORD;
00418         }
00419 
00420     } else if(*text == quoteChar) {
00421         return extractQuotedString() ? T_STRING : T_SIGLUM;
00422 
00423     } else if(ispunct((unsigned char)*text)){
00424         charValue = *text;
00425         text++;
00426         return T_SIGLUM;
00427 
00428     } else if(readLF0()){
00429         return T_LF;
00430 
00431     } else if(*text == '\0'){
00432         return T_EOF;
00433     }
00434 
00435     return T_NONE;
00436 }
00437 
00438 
00439 
00440 
00441 
00445 void EasyScanner::toLower()
00446 {
00447     for(size_t i=0; i < stringValue.size(); ++i){
00448         stringValue[i] = tolower(stringValue[i]);
00449     }
00450 }
00451 
00452 
00453 int EasyScanner::extractQuotedString()
00454 {
00455     text++;
00456     char* org = text;
00457 
00458     if(isLineOriented){
00459         while(true){
00460             if(*text == '\r' || *text == '\n' || *text == '\0'){
00461                 text = org;
00462                 return false;
00463             }
00464             if(*text == quoteChar) break;
00465             text++;
00466         }
00467     } else {
00468         while(true){
00469             if(*text == '\0'){
00470                 text = org;
00471                 return false;
00472             }
00473             readLF0();
00474             if(*text == quoteChar) break;
00475             text++;
00476         }
00477     }
00478 
00479     stringValue.assign(org, text - org);
00480     text++;
00481     return true;
00482 }
00483 
00484 
00485 bool EasyScanner::readDouble()
00486 {
00487     char* tail;
00488 
00489     if(checkLF()) return false;
00490 
00491     doubleValue = mystrtod(text, &tail);
00492 
00493     if(tail != text){
00494         text = tail;
00495         return true;
00496     }
00497 
00498     return false;
00499 }
00500 
00501 bool EasyScanner::readInt()
00502 {
00503     char* tail;
00504 
00505     if(checkLF()) return false;
00506 
00507     intValue = strtol(text, &tail, 0);
00508     if(tail != text){
00509         text = tail;
00510         return true;
00511     }
00512 
00513     return false;
00514 }
00515 
00516 
00517 bool EasyScanner::readChar()
00518 {
00519     skipSpace();
00520 
00521     if(isgraph((unsigned char)*text)){
00522         charValue = *text;
00523         text++;
00524         return true;
00525     }
00526 
00527     return false;
00528 }
00529 
00530 
00531 bool EasyScanner::readChar(int chara)
00532 {
00533     skipSpace();
00534 
00535     if(*text == chara){
00536         text++;
00537         return true;
00538     }
00539 
00540     return false;
00541 }
00542 
00543 int EasyScanner::peekChar()
00544 {
00545     skipSpace();
00546 
00547     return *text;
00548 }
00549 
00550 
00551 bool EasyScanner::readWord0()
00552 {
00553     char* org = text;
00554 
00555     while(true){
00556         int c = (unsigned char)*text;
00557         if(!isalnum(c) && isascii(c) && c != '_'){
00558             break;
00559         }
00560         text++;
00561     }
00562 
00563     if(text - org > 0){
00564         stringValue.assign(org, text - org);
00565         return true;
00566     }
00567 
00568     return false;
00569 }
00570 
00571 
00572 bool EasyScanner::readString0(const int delimiterChar)
00573 {
00574     char* org = text;
00575 
00576     while(true){
00577         int c = (unsigned char)*text;
00578         if(isspace(c) || iscntrl(c) || c == delimiterChar){
00579             break;
00580         }
00581         text++;
00582     }
00583 
00584     if(text - org > 0){
00585         stringValue.assign(org, text - org);
00586         return true;
00587     }
00588 
00589     return false;
00590 }
00591 
00592 
00593 bool EasyScanner::readString(const char* str)
00594 {
00595     skipSpace();
00596 
00597     char* org = text;
00598     while(*str != '\0'){
00599         if(*str++ != *text++){
00600             text = org;
00601             return false;
00602         }
00603     }
00604 
00605     return true;
00606 }
00607 
00608 
00613 bool EasyScanner::readQuotedString(bool allowNoQuotedWord)
00614 {
00615     skipSpace();
00616 
00617     if(*text == quoteChar){
00618         return extractQuotedString();
00619 
00620     } else if(allowNoQuotedWord){
00621         return readString0(' ');
00622     }
00623 
00624     return false;
00625 }
00626 
00627 
00628 bool EasyScanner::readUnquotedTextBlock()
00629 {
00630     skipSpace();
00631 
00632     char* org = text;
00633     while(true){
00634         if(*text == '\r' || *text == '\n' || *text == commentChar || *text == '\0'){
00635             break;
00636         }
00637         text++;
00638     }
00639 
00640     if(text != org){
00641         stringValue.assign(org, text - org);
00642         return true;
00643     }
00644     return false;
00645 }
00646 
00647 
00648 
00649 bool EasyScanner::readSymbol()
00650 {
00651     if(readWord()){
00652         symbolValue = getSymbolID(stringValue);
00653         if(symbolValue){
00654             return true;
00655         }
00656     }
00657 
00658     return false;
00659 }
00660 
00661 
00662 bool EasyScanner::readSymbol(int id)
00663 {
00664     char* org = text;
00665     int orglineNumber = lineNumber;
00666 
00667     if(readWord()){
00668         symbolValue = getSymbolID(stringValue);
00669         if(symbolValue == id){
00670             return true;
00671         } else {
00672             text = org;
00673             lineNumber = orglineNumber;
00674         }
00675     }
00676 
00677     return false;
00678 }
00679 
00680 
00681 
00682 bool EasyScanner::skipLine()
00683 {
00684     while(true){
00685         if(readLF0()){
00686             return true;
00687         }
00688         if(*text == '\0'){
00689             return false;
00690         }
00691         text++;
00692     }
00693 }
00694 
00695 
00696 bool EasyScanner::readLine()
00697 {
00698     char* org = text;
00699 
00700     if(skipLine()){
00701         // eliminate newline code
00702         char* end = text - 1;
00703         if(*end == '\n'){
00704             end--;
00705             if(*end == '\r'){
00706                 end--;
00707             }
00708         }
00709         end++;
00710 
00711         stringValue.assign(org, end - org);
00712         return true;
00713     }
00714 
00715     return false;
00716 }
00717 
00718 
00719 bool EasyScanner::skipBlankLines()
00720 {
00721     do {
00722         if(*text == '\0'){
00723             return false;
00724         }
00725     } while(readLF());
00726 
00727     return true;
00728 }
00729 
00730 
00731 // operators
00732 
00733 EasyScanner& operator>>(EasyScanner& scanner, double& value)
00734 {
00735     if(!scanner.readDouble()){
00736         scanner.throwException("scan error: can't read double value");
00737     }
00738     value = scanner.doubleValue;
00739     return scanner;
00740 }
00741 
00742 
00743 EasyScanner& operator>>(EasyScanner& scanner, int& value)
00744 {
00745     if(!scanner.readInt()){
00746         scanner.throwException("scan error: can't read int value");
00747         throw scanner;
00748     }
00749     value = scanner.intValue;
00750     return scanner;
00751 }
00752 
00753 
00754 EasyScanner& operator>>(EasyScanner& scanner, const char* matchString)
00755 {
00756     scanner.skipSpace();
00757     while(*matchString != '\0'){
00758         if(*scanner.text++ != *matchString++){
00759             scanner.throwException("scan error: unmatched string");
00760         }
00761     }
00762     return scanner;
00763 }
00764 
00765 
00766 EasyScanner& operator>>(EasyScanner& scanner, char matchChar)
00767 {
00768     scanner.skipSpace();
00769     if(*scanner.text++ != matchChar){
00770         scanner.throwException("scan error: unmatched cahracter");
00771     }
00772     return scanner;
00773 }
00774 
00775 
00776 EasyScanner& operator>>(EasyScanner& scanner, string& str)
00777 {
00778     scanner.skipSpace();
00779     if(!scanner.readQuotedString(true)){
00780         scanner.throwException("scan error: can't read string");
00781     }
00782     str = scanner.stringValue;
00783     return scanner;
00784 }
00785 
00786 
00787 EasyScanner& operator>>(EasyScanner& scanner, EasyScanner::Endl endl)
00788 {
00789     if(!scanner.readLF()){
00790         scanner.throwException("scan error: end of line unmatched");
00791     }
00792     return scanner;
00793 }


openhrp3
Author(s): AIST, General Robotix Inc., Nakamura Lab of Dept. of Mechano Informatics at University of Tokyo
autogenerated on Sun Apr 2 2017 03:43:53