scanner.cpp
Go to the documentation of this file.
00001 #include "scanner.h"
00002 #include "token.h"
00003 #include "yaml-cpp-pm/exceptions.h"
00004 #include "exp.h"
00005 #include <cassert>
00006 #include <memory>
00007 
00008 namespace YAML_PM
00009 {
00010         Scanner::Scanner(std::istream& in)
00011                 : INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false)
00012         {
00013         }
00014 
00015         Scanner::~Scanner()
00016         {
00017         }
00018 
00019         // empty
00020         // . Returns true if there are no more tokens to be read
00021         bool Scanner::empty()
00022         {
00023                 EnsureTokensInQueue();
00024                 return m_tokens.empty();
00025         }
00026 
00027         // pop
00028         // . Simply removes the next token on the queue.
00029         void Scanner::pop()
00030         {
00031                 EnsureTokensInQueue();
00032                 if(!m_tokens.empty())
00033                         m_tokens.pop();
00034         }
00035 
00036         // peek
00037         // . Returns (but does not remove) the next token on the queue.
00038         Token& Scanner::peek()
00039         {
00040                 EnsureTokensInQueue();
00041                 assert(!m_tokens.empty());  // should we be asserting here? I mean, we really just be checking
00042                                             // if it's empty before peeking.
00043 
00044 #if 0
00045                 static Token *pLast = 0;
00046                 if(pLast != &m_tokens.front())
00047                         std::cerr << "peek: " << m_tokens.front() << "\n";
00048                 pLast = &m_tokens.front();
00049 #endif
00050 
00051                 return m_tokens.front();
00052         }
00053 
00054         // EnsureTokensInQueue
00055         // . Scan until there's a valid token at the front of the queue,
00056         //   or we're sure the queue is empty.
00057         void Scanner::EnsureTokensInQueue()
00058         {
00059                 while(1) {
00060                         if(!m_tokens.empty()) {
00061                                 Token& token = m_tokens.front();
00062 
00063                                 // if this guy's valid, then we're done
00064                                 if(token.status == Token::VALID)
00065                                         return;
00066 
00067                                 // here's where we clean up the impossible tokens
00068                                 if(token.status == Token::INVALID) {
00069                                         m_tokens.pop();
00070                                         continue;
00071                                 }
00072 
00073                                 // note: what's left are the unverified tokens
00074                         }
00075 
00076                         // no token? maybe we've actually finished
00077                         if(m_endedStream)
00078                                 return;
00079 
00080                         // no? then scan...
00081                         ScanNextToken();
00082                 }
00083         }
00084 
00085         // ScanNextToken
00086         // . The main scanning function; here we branch out and
00087         //   scan whatever the next token should be.
00088         void Scanner::ScanNextToken()
00089         {
00090                 if(m_endedStream)
00091                         return;
00092 
00093                 if(!m_startedStream)
00094                         return StartStream();
00095 
00096                 // get rid of whitespace, etc. (in between tokens it should be irrelevent)
00097                 ScanToNextToken();
00098 
00099                 // maybe need to end some blocks
00100                 PopIndentToHere();
00101 
00102                 // *****
00103                 // And now branch based on the next few characters!
00104                 // *****
00105                 
00106                 // end of stream
00107                 if(!INPUT)
00108                         return EndStream();
00109 
00110                 if(INPUT.column() == 0 && INPUT.peek() == Keys::Directive)
00111                         return ScanDirective();
00112 
00113                 // document token
00114                 if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT))
00115                         return ScanDocStart();
00116 
00117                 if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT))
00118                         return ScanDocEnd();
00119 
00120                 // flow start/end/entry
00121                 if(INPUT.peek() == Keys::FlowSeqStart || INPUT.peek() == Keys::FlowMapStart)
00122                         return ScanFlowStart();
00123 
00124                 if(INPUT.peek() == Keys::FlowSeqEnd || INPUT.peek() == Keys::FlowMapEnd)
00125                         return ScanFlowEnd();
00126         
00127                 if(INPUT.peek() == Keys::FlowEntry)
00128                         return ScanFlowEntry();
00129 
00130                 // block/map stuff
00131                 if(Exp::BlockEntry().Matches(INPUT))
00132                         return ScanBlockEntry();
00133 
00134                 if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT))
00135                         return ScanKey();
00136 
00137                 if(GetValueRegex().Matches(INPUT))
00138                         return ScanValue();
00139 
00140                 // alias/anchor
00141                 if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
00142                         return ScanAnchorOrAlias();
00143 
00144                 // tag
00145                 if(INPUT.peek() == Keys::Tag)
00146                         return ScanTag();
00147 
00148                 // special scalars
00149                 if(InBlockContext() && (INPUT.peek() == Keys::LiteralScalar || INPUT.peek() == Keys::FoldedScalar))
00150                         return ScanBlockScalar();
00151 
00152                 if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
00153                         return ScanQuotedScalar();
00154 
00155                 // plain scalars
00156                 if((InBlockContext() ? Exp::PlainScalar() : Exp::PlainScalarInFlow()).Matches(INPUT))
00157                         return ScanPlainScalar();
00158 
00159                 // don't know what it is!
00160                 throw ParserException(INPUT.mark(), ErrorMsg::UNKNOWN_TOKEN);
00161         }
00162 
00163         // ScanToNextToken
00164         // . Eats input until we reach the next token-like thing.
00165         void Scanner::ScanToNextToken()
00166         {
00167                 while(1) {
00168                         // first eat whitespace
00169                         while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
00170                                 if(InBlockContext() && Exp::Tab().Matches(INPUT))
00171                                         m_simpleKeyAllowed = false;
00172                                 INPUT.eat(1);
00173                         }
00174 
00175                         // then eat a comment
00176                         if(Exp::Comment().Matches(INPUT)) {
00177                                 // eat until line break
00178                                 while(INPUT && !Exp::Break().Matches(INPUT))
00179                                         INPUT.eat(1);
00180                         }
00181 
00182                         // if it's NOT a line break, then we're done!
00183                         if(!Exp::Break().Matches(INPUT))
00184                                 break;
00185 
00186                         // otherwise, let's eat the line break and keep going
00187                         int n = Exp::Break().Match(INPUT);
00188                         INPUT.eat(n);
00189 
00190                         // oh yeah, and let's get rid of that simple key
00191                         InvalidateSimpleKey();
00192 
00193                         // new line - we may be able to accept a simple key now
00194                         if(InBlockContext())
00195                                 m_simpleKeyAllowed = true;
00196         }
00197         }
00198 
00200         // Misc. helpers
00201 
00202         // IsWhitespaceToBeEaten
00203         // . We can eat whitespace if it's a space or tab
00204         // . Note: originally tabs in block context couldn't be eaten
00205         //         "where a simple key could be allowed
00206         //         (i.e., not at the beginning of a line, or following '-', '?', or ':')"
00207         //   I think this is wrong, since tabs can be non-content whitespace; it's just
00208         //   that they can't contribute to indentation, so once you've seen a tab in a
00209         //   line, you can't start a simple key
00210         bool Scanner::IsWhitespaceToBeEaten(char ch)
00211         {
00212                 if(ch == ' ')
00213                         return true;
00214 
00215                 if(ch == '\t')
00216                         return true;
00217 
00218                 return false;
00219         }
00220 
00221         // GetValueRegex
00222         // . Get the appropriate regex to check if it's a value token
00223         const RegEx& Scanner::GetValueRegex() const
00224         {
00225                 if(InBlockContext())
00226                         return Exp::Value();
00227                 
00228                 return m_canBeJSONFlow ? Exp::ValueInJSONFlow() : Exp::ValueInFlow();
00229         }
00230 
00231         // StartStream
00232         // . Set the initial conditions for starting a stream.
00233         void Scanner::StartStream()
00234         {
00235                 m_startedStream = true;
00236                 m_simpleKeyAllowed = true;
00237                 std::auto_ptr<IndentMarker> pIndent(new IndentMarker(-1, IndentMarker::NONE));
00238                 m_indentRefs.push_back(pIndent);
00239                 m_indents.push(&m_indentRefs.back());
00240         }
00241 
00242         // EndStream
00243         // . Close out the stream, finish up, etc.
00244         void Scanner::EndStream()
00245         {
00246                 // force newline
00247                 if(INPUT.column() > 0)
00248                         INPUT.ResetColumn();
00249 
00250                 PopAllIndents();
00251                 PopAllSimpleKeys();
00252 
00253                 m_simpleKeyAllowed = false;
00254                 m_endedStream = true;
00255         }
00256 
00257         Token *Scanner::PushToken(Token::TYPE type)
00258         {
00259                 m_tokens.push(Token(type, INPUT.mark()));
00260                 return &m_tokens.back();
00261         }
00262 
00263         Token::TYPE Scanner::GetStartTokenFor(IndentMarker::INDENT_TYPE type) const
00264         {
00265                 switch(type) {
00266                         case IndentMarker::SEQ: return Token::BLOCK_SEQ_START;
00267                         case IndentMarker::MAP: return Token::BLOCK_MAP_START;
00268                         case IndentMarker::NONE: assert(false); break;
00269                 }
00270                 assert(false);
00271                 throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
00272         }
00273 
00274         // PushIndentTo
00275         // . Pushes an indentation onto the stack, and enqueues the
00276         //   proper token (sequence start or mapping start).
00277         // . Returns the indent marker it generates (if any).
00278         Scanner::IndentMarker *Scanner::PushIndentTo(int column, IndentMarker::INDENT_TYPE type)
00279         {
00280                 // are we in flow?
00281                 if(InFlowContext())
00282                         return 0;
00283                 
00284                 std::auto_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
00285                 IndentMarker& indent = *pIndent;
00286                 const IndentMarker& lastIndent = *m_indents.top();
00287 
00288                 // is this actually an indentation?
00289                 if(indent.column < lastIndent.column)
00290                         return 0;
00291                 if(indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP))
00292                         return 0;
00293 
00294                 // push a start token
00295                 indent.pStartToken = PushToken(GetStartTokenFor(type));
00296 
00297                 // and then the indent
00298                 m_indents.push(&indent);
00299                 m_indentRefs.push_back(pIndent);
00300                 return &m_indentRefs.back();
00301         }
00302 
00303         // PopIndentToHere
00304         // . Pops indentations off the stack until we reach the current indentation level,
00305         //   and enqueues the proper token each time.
00306         // . Then pops all invalid indentations off.
00307         void Scanner::PopIndentToHere()
00308         {
00309                 // are we in flow?
00310                 if(InFlowContext())
00311                         return;
00312 
00313                 // now pop away
00314                 while(!m_indents.empty()) {
00315                         const IndentMarker& indent = *m_indents.top();
00316                         if(indent.column < INPUT.column())
00317                                 break;
00318                         if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT)))
00319                                 break;
00320                                 
00321                         PopIndent();
00322                 }
00323                 
00324                 while(!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID)
00325                         PopIndent();
00326         }
00327         
00328         // PopAllIndents
00329         // . Pops all indentations (except for the base empty one) off the stack,
00330         //   and enqueues the proper token each time.
00331         void Scanner::PopAllIndents()
00332         {
00333                 // are we in flow?
00334                 if(InFlowContext())
00335                         return;
00336 
00337                 // now pop away
00338                 while(!m_indents.empty()) {
00339                         const IndentMarker& indent = *m_indents.top();
00340                         if(indent.type == IndentMarker::NONE)
00341                                 break;
00342                         
00343                         PopIndent();
00344                 }
00345         }
00346         
00347         // PopIndent
00348         // . Pops a single indent, pushing the proper token
00349         void Scanner::PopIndent()
00350         {
00351                 const IndentMarker& indent = *m_indents.top();
00352                 m_indents.pop();
00353 
00354                 if(indent.status != IndentMarker::VALID) {
00355                         InvalidateSimpleKey();
00356                         return;
00357                 }
00358                 
00359                 if(indent.type == IndentMarker::SEQ)
00360                         m_tokens.push(Token(Token::BLOCK_SEQ_END, INPUT.mark()));
00361                 else if(indent.type == IndentMarker::MAP)
00362                         m_tokens.push(Token(Token::BLOCK_MAP_END, INPUT.mark()));
00363         }
00364 
00365         // GetTopIndent
00366         int Scanner::GetTopIndent() const
00367         {
00368                 if(m_indents.empty())
00369                         return 0;
00370                 return m_indents.top()->column;
00371         }
00372 
00373         // ThrowParserException
00374         // . Throws a ParserException with the current token location
00375         //   (if available).
00376         // . Does not parse any more tokens.
00377         void Scanner::ThrowParserException(const std::string& msg) const
00378         {
00379                 Mark mark = Mark::null();
00380                 if(!m_tokens.empty()) {
00381                         const Token& token = m_tokens.front();
00382                         mark = token.mark;
00383                 }
00384                 throw ParserException(mark, msg);
00385         }
00386 }
00387 


upstream_src
Author(s):
autogenerated on Mon Oct 6 2014 10:27:42