scantoken.cpp
Go to the documentation of this file.
00001 #include "scanner.h"
00002 #include "token.h"
00003 #include "yaml-cpp-pm/exceptions.h"
00004 #include "exp.h"
00005 #include "scanscalar.h"
00006 #include "scantag.h"
00007 #include "tag.h"
00008 #include <sstream>
00009 
00010 namespace YAML_PM
00011 {
00013         // Specialization for scanning specific tokens
00014 
00015         // Directive
00016         // . Note: no semantic checking is done here (that's for the parser to do)
00017         void Scanner::ScanDirective()
00018         {
00019                 std::string name;
00020                 std::vector <std::string> params;
00021 
00022                 // pop indents and simple keys
00023                 PopAllIndents();
00024                 PopAllSimpleKeys();
00025 
00026                 m_simpleKeyAllowed = false;
00027                 m_canBeJSONFlow = false;
00028 
00029                 // store pos and eat indicator
00030                 Token token(Token::DIRECTIVE, INPUT.mark());
00031                 INPUT.eat(1);
00032 
00033                 // read name
00034                 while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
00035                         token.value += INPUT.get();
00036 
00037                 // read parameters
00038                 while(1) {
00039                         // first get rid of whitespace
00040                         while(Exp::Blank().Matches(INPUT))
00041                                 INPUT.eat(1);
00042 
00043                         // break on newline or comment
00044                         if(!INPUT || Exp::Break().Matches(INPUT) || Exp::Comment().Matches(INPUT))
00045                                 break;
00046 
00047                         // now read parameter
00048                         std::string param;
00049                         while(INPUT && !Exp::BlankOrBreak().Matches(INPUT))
00050                                 param += INPUT.get();
00051 
00052                         token.params.push_back(param);
00053                 }
00054                 
00055                 m_tokens.push(token);
00056         }
00057 
00058         // DocStart
00059         void Scanner::ScanDocStart()
00060         {
00061                 PopAllIndents();
00062                 PopAllSimpleKeys();
00063                 m_simpleKeyAllowed = false;
00064                 m_canBeJSONFlow = false;
00065 
00066                 // eat
00067                 Mark mark = INPUT.mark();
00068                 INPUT.eat(3);
00069                 m_tokens.push(Token(Token::DOC_START, mark));
00070         }
00071 
00072         // DocEnd
00073         void Scanner::ScanDocEnd()
00074         {
00075                 PopAllIndents();
00076                 PopAllSimpleKeys();
00077                 m_simpleKeyAllowed = false;
00078                 m_canBeJSONFlow = false;
00079 
00080                 // eat
00081                 Mark mark = INPUT.mark();
00082                 INPUT.eat(3);
00083                 m_tokens.push(Token(Token::DOC_END, mark));
00084         }
00085 
00086         // FlowStart
00087         void Scanner::ScanFlowStart()
00088         {
00089                 // flows can be simple keys
00090                 InsertPotentialSimpleKey();
00091                 m_simpleKeyAllowed = true;
00092                 m_canBeJSONFlow = false;
00093 
00094                 // eat
00095                 Mark mark = INPUT.mark();
00096                 char ch = INPUT.get();
00097                 FLOW_MARKER flowType = (ch == Keys::FlowSeqStart ? FLOW_SEQ : FLOW_MAP);
00098                 m_flows.push(flowType);
00099                 Token::TYPE type = (flowType == FLOW_SEQ ? Token::FLOW_SEQ_START : Token::FLOW_MAP_START);
00100                 m_tokens.push(Token(type, mark));
00101         }
00102 
00103         // FlowEnd
00104         void Scanner::ScanFlowEnd()
00105         {
00106                 if(InBlockContext())
00107                         throw ParserException(INPUT.mark(), ErrorMsg::FLOW_END);
00108 
00109                 // we might have a solo entry in the flow context
00110                 if(InFlowContext()) {
00111                         if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
00112                                 m_tokens.push(Token(Token::VALUE, INPUT.mark()));
00113                         else if(m_flows.top() == FLOW_SEQ)
00114                                 InvalidateSimpleKey();
00115                 }
00116 
00117                 m_simpleKeyAllowed = false;
00118                 m_canBeJSONFlow = true;
00119 
00120                 // eat
00121                 Mark mark = INPUT.mark();
00122                 char ch = INPUT.get();
00123 
00124                 // check that it matches the start
00125                 FLOW_MARKER flowType = (ch == Keys::FlowSeqEnd ? FLOW_SEQ : FLOW_MAP);
00126                 if(m_flows.top() != flowType)
00127                         throw ParserException(mark, ErrorMsg::FLOW_END);
00128                 m_flows.pop();
00129                 
00130                 Token::TYPE type = (flowType ? Token::FLOW_SEQ_END : Token::FLOW_MAP_END);
00131                 m_tokens.push(Token(type, mark));
00132         }
00133 
00134         // FlowEntry
00135         void Scanner::ScanFlowEntry()
00136         {
00137                 // we might have a solo entry in the flow context
00138                 if(InFlowContext()) {
00139                         if(m_flows.top() == FLOW_MAP && VerifySimpleKey())
00140                                 m_tokens.push(Token(Token::VALUE, INPUT.mark()));
00141                         else if(m_flows.top() == FLOW_SEQ)
00142                                 InvalidateSimpleKey();
00143                 }
00144                 
00145                 m_simpleKeyAllowed = true;
00146                 m_canBeJSONFlow = false;
00147 
00148                 // eat
00149                 Mark mark = INPUT.mark();
00150                 INPUT.eat(1);
00151                 m_tokens.push(Token(Token::FLOW_ENTRY, mark));
00152         }
00153 
00154         // BlockEntry
00155         void Scanner::ScanBlockEntry()
00156         {
00157                 // we better be in the block context!
00158                 if(InFlowContext())
00159                         throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
00160 
00161                 // can we put it here?
00162                 if(!m_simpleKeyAllowed)
00163                         throw ParserException(INPUT.mark(), ErrorMsg::BLOCK_ENTRY);
00164 
00165                 PushIndentTo(INPUT.column(), IndentMarker::SEQ);
00166                 m_simpleKeyAllowed = true;
00167                 m_canBeJSONFlow = false;
00168 
00169                 // eat
00170                 Mark mark = INPUT.mark();
00171                 INPUT.eat(1);
00172                 m_tokens.push(Token(Token::BLOCK_ENTRY, mark));
00173         }
00174 
00175         // Key
00176         void Scanner::ScanKey()
00177         {
00178                 // handle keys diffently in the block context (and manage indents)
00179                 if(InBlockContext()) {
00180                         if(!m_simpleKeyAllowed)
00181                                 throw ParserException(INPUT.mark(), ErrorMsg::MAP_KEY);
00182 
00183                         PushIndentTo(INPUT.column(), IndentMarker::MAP);
00184                 }
00185 
00186                 // can only put a simple key here if we're in block context
00187                 m_simpleKeyAllowed = InBlockContext();
00188 
00189                 // eat
00190                 Mark mark = INPUT.mark();
00191                 INPUT.eat(1);
00192                 m_tokens.push(Token(Token::KEY, mark));
00193         }
00194 
00195         // Value
00196         void Scanner::ScanValue()
00197         {
00198                 // and check that simple key
00199                 bool isSimpleKey = VerifySimpleKey();
00200                 m_canBeJSONFlow = false;
00201                 
00202                 if(isSimpleKey) {
00203                         // can't follow a simple key with another simple key (dunno why, though - it seems fine)
00204                         m_simpleKeyAllowed = false;
00205                 } else {
00206                         // handle values diffently in the block context (and manage indents)
00207                         if(InBlockContext()) {
00208                                 if(!m_simpleKeyAllowed)
00209                                         throw ParserException(INPUT.mark(), ErrorMsg::MAP_VALUE);
00210 
00211                                 PushIndentTo(INPUT.column(), IndentMarker::MAP);
00212                         }
00213 
00214                         // can only put a simple key here if we're in block context
00215                         m_simpleKeyAllowed = InBlockContext();
00216                 }
00217 
00218                 // eat
00219                 Mark mark = INPUT.mark();
00220                 INPUT.eat(1);
00221                 m_tokens.push(Token(Token::VALUE, mark));
00222         }
00223 
00224         // AnchorOrAlias
00225         void Scanner::ScanAnchorOrAlias()
00226         {
00227                 bool alias;
00228                 std::string name;
00229 
00230                 // insert a potential simple key
00231                 InsertPotentialSimpleKey();
00232                 m_simpleKeyAllowed = false;
00233                 m_canBeJSONFlow = false;
00234 
00235                 // eat the indicator
00236                 Mark mark = INPUT.mark();
00237                 char indicator = INPUT.get();
00238                 alias = (indicator == Keys::Alias);
00239 
00240                 // now eat the content
00241                 while(INPUT && Exp::Anchor().Matches(INPUT))
00242                         name += INPUT.get();
00243 
00244                 // we need to have read SOMETHING!
00245                 if(name.empty())
00246                         throw ParserException(INPUT.mark(), alias ? ErrorMsg::ALIAS_NOT_FOUND : ErrorMsg::ANCHOR_NOT_FOUND);
00247 
00248                 // and needs to end correctly
00249                 if(INPUT && !Exp::AnchorEnd().Matches(INPUT))
00250                         throw ParserException(INPUT.mark(), alias ? ErrorMsg::CHAR_IN_ALIAS : ErrorMsg::CHAR_IN_ANCHOR);
00251 
00252                 // and we're done
00253                 Token token(alias ? Token::ALIAS : Token::ANCHOR, mark);
00254                 token.value = name;
00255                 m_tokens.push(token);
00256         }
00257 
00258         // Tag
00259         void Scanner::ScanTag()
00260         {
00261                 // insert a potential simple key
00262                 InsertPotentialSimpleKey();
00263                 m_simpleKeyAllowed = false;
00264                 m_canBeJSONFlow = false;
00265 
00266                 Token token(Token::TAG, INPUT.mark());
00267 
00268                 // eat the indicator
00269                 INPUT.get();
00270                 
00271                 if(INPUT && INPUT.peek() == Keys::VerbatimTagStart){
00272                         std::string tag = ScanVerbatimTag(INPUT);
00273 
00274                         token.value = tag;
00275                         token.data = Tag::VERBATIM;
00276                 } else {
00277                         bool canBeHandle;
00278                         token.value = ScanTagHandle(INPUT, canBeHandle);
00279                         if(!canBeHandle && token.value.empty())
00280                                 token.data = Tag::NON_SPECIFIC;
00281                         else if(token.value.empty())
00282                                 token.data = Tag::SECONDARY_HANDLE;
00283                         else
00284                                 token.data = Tag::PRIMARY_HANDLE;
00285                         
00286                         // is there a suffix?
00287                         if(canBeHandle && INPUT.peek() == Keys::Tag) {
00288                                 // eat the indicator
00289                                 INPUT.get();
00290                                 token.params.push_back(ScanTagSuffix(INPUT));
00291                                 token.data = Tag::NAMED_HANDLE;
00292                         }
00293                 }
00294 
00295                 m_tokens.push(token);
00296         }
00297 
00298         // PlainScalar
00299         void Scanner::ScanPlainScalar()
00300         {
00301                 std::string scalar;
00302 
00303                 // set up the scanning parameters
00304                 ScanScalarParams params;
00305                 params.end = (InFlowContext() ? Exp::EndScalarInFlow() : Exp::EndScalar()) || (Exp::BlankOrBreak() + Exp::Comment());
00306                 params.eatEnd = false;
00307                 params.indent = (InFlowContext() ? 0 : GetTopIndent() + 1);
00308                 params.fold = FOLD_FLOW;
00309                 params.eatLeadingWhitespace = true;
00310                 params.trimTrailingSpaces = true;
00311                 params.chomp = STRIP;
00312                 params.onDocIndicator = BREAK;
00313                 params.onTabInIndentation = THROW;
00314 
00315                 // insert a potential simple key
00316                 InsertPotentialSimpleKey();
00317 
00318                 Mark mark = INPUT.mark();
00319                 scalar = ScanScalar(INPUT, params);
00320 
00321                 // can have a simple key only if we ended the scalar by starting a new line
00322                 m_simpleKeyAllowed = params.leadingSpaces;
00323                 m_canBeJSONFlow = false;
00324 
00325                 // finally, check and see if we ended on an illegal character
00326                 //if(Exp::IllegalCharInScalar.Matches(INPUT))
00327                 //      throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_SCALAR);
00328 
00329                 Token token(Token::PLAIN_SCALAR, mark);
00330                 token.value = scalar;
00331                 m_tokens.push(token);
00332         }
00333 
00334         // QuotedScalar
00335         void Scanner::ScanQuotedScalar()
00336         {
00337                 std::string scalar;
00338 
00339                 // peek at single or double quote (don't eat because we need to preserve (for the time being) the input position)
00340                 char quote = INPUT.peek();
00341                 bool single = (quote == '\'');
00342 
00343                 // setup the scanning parameters
00344                 ScanScalarParams params;
00345                 params.end = (single ? RegEx(quote) && !Exp::EscSingleQuote() : RegEx(quote));
00346                 params.eatEnd = true;
00347                 params.escape = (single ? '\'' : '\\');
00348                 params.indent = 0;
00349                 params.fold = FOLD_FLOW;
00350                 params.eatLeadingWhitespace = true;
00351                 params.trimTrailingSpaces = false;
00352                 params.chomp = CLIP;
00353                 params.onDocIndicator = THROW;
00354 
00355                 // insert a potential simple key
00356                 InsertPotentialSimpleKey();
00357 
00358                 Mark mark = INPUT.mark();
00359 
00360                 // now eat that opening quote
00361                 INPUT.get();
00362                 
00363                 // and scan
00364                 scalar = ScanScalar(INPUT, params);
00365                 m_simpleKeyAllowed = false;
00366                 m_canBeJSONFlow = true;
00367 
00368                 Token token(Token::NON_PLAIN_SCALAR, mark);
00369                 token.value = scalar;
00370                 m_tokens.push(token);
00371         }
00372 
00373         // BlockScalarToken
00374         // . These need a little extra processing beforehand.
00375         // . We need to scan the line where the indicator is (this doesn't count as part of the scalar),
00376         //   and then we need to figure out what level of indentation we'll be using.
00377         void Scanner::ScanBlockScalar()
00378         {
00379                 std::string scalar;
00380 
00381                 ScanScalarParams params;
00382                 params.indent = 1;
00383                 params.detectIndent = true;
00384 
00385                 // eat block indicator ('|' or '>')
00386                 Mark mark = INPUT.mark();
00387                 char indicator = INPUT.get();
00388                 params.fold = (indicator == Keys::FoldedScalar ? FOLD_BLOCK : DONT_FOLD);
00389 
00390                 // eat chomping/indentation indicators
00391                 params.chomp = CLIP;
00392                 int n = Exp::Chomp().Match(INPUT);
00393                 for(int i=0;i<n;i++) {
00394                         char ch = INPUT.get();
00395                         if(ch == '+')
00396                                 params.chomp = KEEP;
00397                         else if(ch == '-')
00398                                 params.chomp = STRIP;
00399                         else if(Exp::Digit().Matches(ch)) {
00400                                 if(ch == '0')
00401                                         throw ParserException(INPUT.mark(), ErrorMsg::ZERO_INDENT_IN_BLOCK);
00402 
00403                                 params.indent = ch - '0';
00404                                 params.detectIndent = false;
00405                         }
00406                 }
00407 
00408                 // now eat whitespace
00409                 while(Exp::Blank().Matches(INPUT))
00410                         INPUT.eat(1);
00411 
00412                 // and comments to the end of the line
00413                 if(Exp::Comment().Matches(INPUT))
00414                         while(INPUT && !Exp::Break().Matches(INPUT))
00415                                 INPUT.eat(1);
00416 
00417                 // if it's not a line break, then we ran into a bad character inline
00418                 if(INPUT && !Exp::Break().Matches(INPUT))
00419                         throw ParserException(INPUT.mark(), ErrorMsg::CHAR_IN_BLOCK);
00420 
00421                 // set the initial indentation
00422                 if(GetTopIndent() >= 0)
00423                         params.indent += GetTopIndent();
00424 
00425                 params.eatLeadingWhitespace = false;
00426                 params.trimTrailingSpaces = false;
00427                 params.onTabInIndentation = THROW;
00428 
00429                 scalar = ScanScalar(INPUT, params);
00430 
00431                 // simple keys always ok after block scalars (since we're gonna start a new line anyways)
00432                 m_simpleKeyAllowed = true;
00433                 m_canBeJSONFlow = false;
00434 
00435                 Token token(Token::NON_PLAIN_SCALAR, mark);
00436                 token.value = scalar;
00437                 m_tokens.push(token);
00438         }
00439 }


libpointmatcher
Author(s):
autogenerated on Mon Sep 14 2015 02:59:06