Go to the documentation of this file.00001
00006 #include <iostream>
00007 #include <cstdio>
00008 #include <cctype>
00009 #include <cstdlib>
00010 #include <cmath>
00011 #include <cstring>
00012 #include <boost/format.hpp>
00013 #include <errno.h>
00014
00015 #include "EasyScanner.h"
00016
00017 using namespace std;
00018 using namespace boost;
00019 using namespace hrp;
00020
00021
00022
00023
00024
00025 #ifdef _MSC_VER
00026 static double mystrtod(const char* nptr, char** endptr)
00027 {
00028 const char* org = nptr;
00029 bool valid = false;
00030 double value = 0.0;
00031 double sign = +1.0;
00032
00033 if(*nptr == '+'){
00034 nptr++;
00035 } else if(*nptr == '-'){
00036 sign = -1.0;
00037 nptr++;
00038 }
00039 if(isdigit((unsigned char)*nptr)){
00040 valid = true;
00041 do {
00042 value = value * 10.0 + (*nptr - '0');
00043 nptr++;
00044 } while(isdigit((unsigned char)*nptr));
00045 }
00046 if(*nptr == '.'){
00047 valid = false;
00048 nptr++;
00049 if(isdigit((unsigned char)*nptr)){
00050 double small = 0.1;
00051 valid = true;
00052 do {
00053 value += small * (*nptr - '0');
00054 small *= 0.1;
00055 nptr++;
00056 } while(isdigit((unsigned char)*nptr));
00057 }
00058 }
00059 if(valid && (*nptr == 'e' || *nptr == 'E')){
00060 nptr++;
00061 valid = false;
00062 double psign = +1.0;
00063 if(*nptr == '+'){
00064 nptr++;
00065 } else if(*nptr == '-'){
00066 psign = -1.0;
00067 nptr++;
00068 }
00069 if(isdigit((unsigned char)*nptr)){
00070 valid = true;
00071 double p = 0.0;
00072 do {
00073 p = p * 10.0 + (*nptr - '0');
00074 nptr++;
00075 } while(isdigit((unsigned char)*nptr));
00076 value *= pow(10.0, psign * p);
00077 }
00078 }
00079 if(valid){
00080 *endptr = (char*)nptr;
00081 } else {
00082 *endptr = (char*)org;
00083 }
00084 return sign * value;
00085 }
00086 #else
00087 static inline double mystrtod(const char* nptr, char** endptr) {
00088 return strtod(nptr, endptr);
00089 }
00090 #endif
00091
00092
00093 std::string EasyScanner::Exception::getFullMessage()
00094 {
00095 string m(message);
00096
00097 if(lineNumber > 0){
00098 m += str(format(" at line %1%") % lineNumber);
00099 }
00100
00101 if(!filename.empty()){
00102 m += str(format(" of %1%") % filename);
00103 }
00104
00105 return m;
00106 }
00107
00108
00109 EasyScanner::EasyScanner()
00110 {
00111 init();
00112 }
00113
00114
00118 EasyScanner::EasyScanner(string filename)
00119 {
00120 init();
00121 loadFile(filename);
00122 }
00123
00124
00125 void EasyScanner::init()
00126 {
00127 textBuf = 0;
00128 size = 0;
00129 textBufEnd = 0;
00130 lineNumberOffset = 1;
00131
00132 commentChar = '#';
00133 quoteChar = 0xffff;
00134 isLineOriented = true;
00135 defaultErrorMessage = "unknown error of the lexical scanner";
00136
00137 whiteSpaceChars.push_back(' ');
00138 whiteSpaceChars.push_back('\t');
00139
00140 symbols.reset(new SymbolMap());
00141 }
00142
00143
00149 EasyScanner::EasyScanner(const EasyScanner& org, bool copyText) :
00150 whiteSpaceChars(org.whiteSpaceChars)
00151 {
00152 commentChar = org.commentChar;
00153 quoteChar = org.quoteChar;
00154 isLineOriented = org.isLineOriented;
00155 filename = org.filename;
00156 defaultErrorMessage = org.defaultErrorMessage;
00157 lineNumber = org.lineNumber;
00158 lineNumberOffset = org.lineNumberOffset;
00159
00160 symbols = org.symbols;
00161
00162 if(copyText && org.textBuf){
00163 size = org.size;
00164 textBuf = new char[size+1];
00165 memcpy(textBuf, org.textBuf, size+1);
00166 text = textBuf;
00167 textBufEnd = textBuf + size;
00168 } else {
00169 textBuf = 0;
00170 size = 0;
00171 textBufEnd = 0;
00172 }
00173 }
00174
00175
00177 void EasyScanner::setText(const char* text, int len)
00178 {
00179 if(textBuf) delete[] textBuf;
00180
00181 size = len;
00182 textBuf = new char[size+1];
00183 memcpy(textBuf, text, len);
00184 textBuf[size] = 0;
00185 this->text = textBuf;
00186 textBufEnd = textBuf + size;
00187 lineNumber = lineNumberOffset;
00188 filename = "";
00189 }
00190
00191
00192 EasyScanner::~EasyScanner()
00193 {
00194 if(textBuf) delete[] textBuf;
00195 }
00196
00197
00198 void EasyScanner::setLineNumberOffset(int offset)
00199 {
00200 lineNumberOffset = offset;
00201 }
00202
00203
00204 void EasyScanner::moveToHead()
00205 {
00206 text = textBuf;
00207 lineNumber = lineNumberOffset;
00208 }
00209
00210
00211 void EasyScanner::putSymbols()
00212 {
00213 SymbolMap::iterator p = symbols->begin();
00214 while(p != symbols->end()){
00215 cout << p->first << " = " << p->second << std::endl;
00216 p++;
00217 }
00218 }
00219
00220
00221 void EasyScanner::throwException(const char* message)
00222 {
00223 Exception ex;
00224 ex.message = message ? message : defaultErrorMessage;
00225 ex.filename = filename;
00226 ex.lineNumber = lineNumber;
00227 throw ex;
00228 }
00229
00230
00231 void EasyScanner::throwException(const std::string& message)
00232 {
00233 throwException(message.c_str());
00234 }
00235
00236
00241 void EasyScanner::setCommentChar(char cc)
00242 {
00243 commentChar = cc ? cc : 0xffff;
00244 }
00245
00246
00247 void EasyScanner::setLineOriented(bool on)
00248 {
00249 isLineOriented = on;
00250 }
00251
00252
00254 void EasyScanner::setWhiteSpaceChar(char ws)
00255 {
00256 whiteSpaceChars.push_back(ws);
00257 }
00258
00259
00264 void EasyScanner::setQuoteChar(char qs)
00265 {
00266 quoteChar = qs;
00267 }
00268
00269
00274 void EasyScanner::loadFile(const string& filename)
00275 {
00276 this->filename.clear();
00277
00278 FILE* file = fopen(filename.c_str(), "rb");
00279
00280 if(!file){
00281 this->lineNumber = -1;
00282 string message;
00283 switch(errno){
00284 case ENOENT:
00285 message = filename + " cannot be found.";
00286 break;
00287 default:
00288 message = string("I/O error in accessing ") + filename;
00289 break;
00290 }
00291 throwException(message.c_str());
00292 }
00293
00294 this->filename = filename;
00295
00296 fseek(file, 0, SEEK_END);
00297 size = ftell(file);
00298 rewind(file);
00299 if(textBuf) delete[] textBuf;
00300 textBuf = new char[size+1];
00301 fread(textBuf, sizeof(char), size, file);
00302 textBuf[size] = 0;
00303 fclose(file);
00304 text = textBuf;
00305 textBufEnd = textBuf + size;
00306 lineNumber = lineNumberOffset;
00307 }
00308
00309
00313 inline void EasyScanner::skipToLineEnd()
00314 {
00315 while(*text != '\r' && *text != '\n' && *text != '\0') text++;
00316 }
00317
00318
00319 void EasyScanner::skipSpace()
00320 {
00321 int n = whiteSpaceChars.size();
00322 while(true){
00323 int i=0;
00324 while(i < n){
00325 if(*text == whiteSpaceChars[i]){
00326 text++;
00327 i = 0;
00328 } else {
00329 i++;
00330 }
00331 }
00332 if(*text == commentChar){
00333 text++;
00334 skipToLineEnd();
00335 }
00336
00337 if(isLineOriented){
00338 break;
00339 }
00340 if(*text == '\n'){
00341 text++;
00342 } else if(*text == '\r'){
00343 text++;
00344 if(*text == '\n'){
00345 text++;
00346 }
00347 } else {
00348 break;
00349 }
00350 lineNumber++;
00351 }
00352 }
00353
00354
00359 bool EasyScanner::readLF0()
00360 {
00361 if(*text == '\n'){
00362 text++;
00363 lineNumber++;
00364 return true;
00365 } else if(*text == '\r'){
00366 text++;
00367 if(*text == '\n'){
00368 text++;
00369 }
00370 lineNumber++;
00371 return true;
00372 }
00373 return false;
00374 }
00375
00376
00377 bool EasyScanner::checkLF()
00378 {
00379 char* current = text;
00380 if(readLF()){
00381 text = current;
00382 return true;
00383 }
00384 return false;
00385 }
00386
00387
00388 int EasyScanner::readToken()
00389 {
00390 skipSpace();
00391
00392 if(isdigit((unsigned char)*text) || *text == '+' || *text == '-'){
00393 char* tail;
00394 intValue = strtol(text, &tail, 0);
00395 if(tail != text){
00396 text = tail;
00397 return T_INTEGER;
00398 }
00399 doubleValue = mystrtod(text, &tail);
00400 if(tail != text){
00401 text = tail;
00402 return T_DOUBLE;
00403 }
00404 charValue = *text;
00405 text++;
00406 return T_SIGLUM;
00407
00408 } else if(isalpha((unsigned char)*text)){
00409 char* org = text;
00410 text++;
00411 while(isalnum((unsigned char)*text) || *text == '_') text++;
00412 stringValue.assign(org, text - org);
00413 if(stringValue.size() == 1){
00414 charValue = *org;
00415 return T_ALPHABET;
00416 } else {
00417 return T_WORD;
00418 }
00419
00420 } else if(*text == quoteChar) {
00421 return extractQuotedString() ? T_STRING : T_SIGLUM;
00422
00423 } else if(ispunct((unsigned char)*text)){
00424 charValue = *text;
00425 text++;
00426 return T_SIGLUM;
00427
00428 } else if(readLF0()){
00429 return T_LF;
00430
00431 } else if(*text == '\0'){
00432 return T_EOF;
00433 }
00434
00435 return T_NONE;
00436 }
00437
00438
00439
00440
00441
00445 void EasyScanner::toLower()
00446 {
00447 for(size_t i=0; i < stringValue.size(); ++i){
00448 stringValue[i] = tolower(stringValue[i]);
00449 }
00450 }
00451
00452
00453 int EasyScanner::extractQuotedString()
00454 {
00455 text++;
00456 char* org = text;
00457
00458 if(isLineOriented){
00459 while(true){
00460 if(*text == '\r' || *text == '\n' || *text == '\0'){
00461 text = org;
00462 return false;
00463 }
00464 if(*text == quoteChar) break;
00465 text++;
00466 }
00467 } else {
00468 while(true){
00469 if(*text == '\0'){
00470 text = org;
00471 return false;
00472 }
00473 readLF0();
00474 if(*text == quoteChar) break;
00475 text++;
00476 }
00477 }
00478
00479 stringValue.assign(org, text - org);
00480 text++;
00481 return true;
00482 }
00483
00484
00485 bool EasyScanner::readDouble()
00486 {
00487 char* tail;
00488
00489 if(checkLF()) return false;
00490
00491 doubleValue = mystrtod(text, &tail);
00492
00493 if(tail != text){
00494 text = tail;
00495 return true;
00496 }
00497
00498 return false;
00499 }
00500
00501 bool EasyScanner::readInt()
00502 {
00503 char* tail;
00504
00505 if(checkLF()) return false;
00506
00507 intValue = strtol(text, &tail, 0);
00508 if(tail != text){
00509 text = tail;
00510 return true;
00511 }
00512
00513 return false;
00514 }
00515
00516
00517 bool EasyScanner::readChar()
00518 {
00519 skipSpace();
00520
00521 if(isgraph((unsigned char)*text)){
00522 charValue = *text;
00523 text++;
00524 return true;
00525 }
00526
00527 return false;
00528 }
00529
00530
00531 bool EasyScanner::readChar(int chara)
00532 {
00533 skipSpace();
00534
00535 if(*text == chara){
00536 text++;
00537 return true;
00538 }
00539
00540 return false;
00541 }
00542
00543 int EasyScanner::peekChar()
00544 {
00545 skipSpace();
00546
00547 return *text;
00548 }
00549
00550
00551 bool EasyScanner::readWord0()
00552 {
00553 char* org = text;
00554
00555 while(true){
00556 int c = (unsigned char)*text;
00557 if(!isalnum(c) && isascii(c) && c != '_'){
00558 break;
00559 }
00560 text++;
00561 }
00562
00563 if(text - org > 0){
00564 stringValue.assign(org, text - org);
00565 return true;
00566 }
00567
00568 return false;
00569 }
00570
00571
00572 bool EasyScanner::readString0(const int delimiterChar)
00573 {
00574 char* org = text;
00575
00576 while(true){
00577 int c = (unsigned char)*text;
00578 if(isspace(c) || iscntrl(c) || c == delimiterChar){
00579 break;
00580 }
00581 text++;
00582 }
00583
00584 if(text - org > 0){
00585 stringValue.assign(org, text - org);
00586 return true;
00587 }
00588
00589 return false;
00590 }
00591
00592
00593 bool EasyScanner::readString(const char* str)
00594 {
00595 skipSpace();
00596
00597 char* org = text;
00598 while(*str != '\0'){
00599 if(*str++ != *text++){
00600 text = org;
00601 return false;
00602 }
00603 }
00604
00605 return true;
00606 }
00607
00608
00613 bool EasyScanner::readQuotedString(bool allowNoQuotedWord)
00614 {
00615 skipSpace();
00616
00617 if(*text == quoteChar){
00618 return extractQuotedString();
00619
00620 } else if(allowNoQuotedWord){
00621 return readString0(' ');
00622 }
00623
00624 return false;
00625 }
00626
00627
00628 bool EasyScanner::readUnquotedTextBlock()
00629 {
00630 skipSpace();
00631
00632 char* org = text;
00633 while(true){
00634 if(*text == '\r' || *text == '\n' || *text == commentChar || *text == '\0'){
00635 break;
00636 }
00637 text++;
00638 }
00639
00640 if(text != org){
00641 stringValue.assign(org, text - org);
00642 return true;
00643 }
00644 return false;
00645 }
00646
00647
00648
00649 bool EasyScanner::readSymbol()
00650 {
00651 if(readWord()){
00652 symbolValue = getSymbolID(stringValue);
00653 if(symbolValue){
00654 return true;
00655 }
00656 }
00657
00658 return false;
00659 }
00660
00661
00662 bool EasyScanner::readSymbol(int id)
00663 {
00664 char* org = text;
00665 int orglineNumber = lineNumber;
00666
00667 if(readWord()){
00668 symbolValue = getSymbolID(stringValue);
00669 if(symbolValue == id){
00670 return true;
00671 } else {
00672 text = org;
00673 lineNumber = orglineNumber;
00674 }
00675 }
00676
00677 return false;
00678 }
00679
00680
00681
00682 bool EasyScanner::skipLine()
00683 {
00684 while(true){
00685 if(readLF0()){
00686 return true;
00687 }
00688 if(*text == '\0'){
00689 return false;
00690 }
00691 text++;
00692 }
00693 }
00694
00695
00696 bool EasyScanner::readLine()
00697 {
00698 char* org = text;
00699
00700 if(skipLine()){
00701
00702 char* end = text - 1;
00703 if(*end == '\n'){
00704 end--;
00705 if(*end == '\r'){
00706 end--;
00707 }
00708 }
00709 end++;
00710
00711 stringValue.assign(org, end - org);
00712 return true;
00713 }
00714
00715 return false;
00716 }
00717
00718
00719 bool EasyScanner::skipBlankLines()
00720 {
00721 do {
00722 if(*text == '\0'){
00723 return false;
00724 }
00725 } while(readLF());
00726
00727 return true;
00728 }
00729
00730
00731
00732
00733 EasyScanner& operator>>(EasyScanner& scanner, double& value)
00734 {
00735 if(!scanner.readDouble()){
00736 scanner.throwException("scan error: can't read double value");
00737 }
00738 value = scanner.doubleValue;
00739 return scanner;
00740 }
00741
00742
00743 EasyScanner& operator>>(EasyScanner& scanner, int& value)
00744 {
00745 if(!scanner.readInt()){
00746 scanner.throwException("scan error: can't read int value");
00747 throw scanner;
00748 }
00749 value = scanner.intValue;
00750 return scanner;
00751 }
00752
00753
00754 EasyScanner& operator>>(EasyScanner& scanner, const char* matchString)
00755 {
00756 scanner.skipSpace();
00757 while(*matchString != '\0'){
00758 if(*scanner.text++ != *matchString++){
00759 scanner.throwException("scan error: unmatched string");
00760 }
00761 }
00762 return scanner;
00763 }
00764
00765
00766 EasyScanner& operator>>(EasyScanner& scanner, char matchChar)
00767 {
00768 scanner.skipSpace();
00769 if(*scanner.text++ != matchChar){
00770 scanner.throwException("scan error: unmatched cahracter");
00771 }
00772 return scanner;
00773 }
00774
00775
00776 EasyScanner& operator>>(EasyScanner& scanner, string& str)
00777 {
00778 scanner.skipSpace();
00779 if(!scanner.readQuotedString(true)){
00780 scanner.throwException("scan error: can't read string");
00781 }
00782 str = scanner.stringValue;
00783 return scanner;
00784 }
00785
00786
00787 EasyScanner& operator>>(EasyScanner& scanner, EasyScanner::Endl endl)
00788 {
00789 if(!scanner.readLF()){
00790 scanner.throwException("scan error: end of line unmatched");
00791 }
00792 return scanner;
00793 }