scanner.cpp
Go to the documentation of this file.
1 #include "scanner.h"
2 #include "token.h"
4 #include "exp.h"
5 #include <cassert>
6 #include <memory>
7 
8 namespace YAML_PM
9 {
10  Scanner::Scanner(std::istream& in)
11  : INPUT(in), m_startedStream(false), m_endedStream(false), m_simpleKeyAllowed(false), m_canBeJSONFlow(false)
12  {
13  }
14 
16  {
17  }
18 
19  // empty
20  // . Returns true if there are no more tokens to be read
22  {
24  return m_tokens.empty();
25  }
26 
27  // pop
28  // . Simply removes the next token on the queue.
29  void Scanner::pop()
30  {
32  if(!m_tokens.empty())
33  m_tokens.pop();
34  }
35 
36  // peek
37  // . Returns (but does not remove) the next token on the queue.
39  {
41  assert(!m_tokens.empty()); // should we be asserting here? I mean, we really just be checking
42  // if it's empty before peeking.
43 
44 #if 0
45  static Token *pLast = 0;
46  if(pLast != &m_tokens.front())
47  std::cerr << "peek: " << m_tokens.front() << "\n";
48  pLast = &m_tokens.front();
49 #endif
50 
51  return m_tokens.front();
52  }
53 
54  // EnsureTokensInQueue
55  // . Scan until there's a valid token at the front of the queue,
56  // or we're sure the queue is empty.
58  {
59  while(1) {
60  if(!m_tokens.empty()) {
61  Token& token = m_tokens.front();
62 
63  // if this guy's valid, then we're done
64  if(token.status == Token::VALID)
65  return;
66 
67  // here's where we clean up the impossible tokens
68  if(token.status == Token::INVALID) {
69  m_tokens.pop();
70  continue;
71  }
72 
73  // note: what's left are the unverified tokens
74  }
75 
76  // no token? maybe we've actually finished
77  if(m_endedStream)
78  return;
79 
80  // no? then scan...
81  ScanNextToken();
82  }
83  }
84 
85  // ScanNextToken
86  // . The main scanning function; here we branch out and
87  // scan whatever the next token should be.
89  {
90  if(m_endedStream)
91  return;
92 
93  if(!m_startedStream)
94  return StartStream();
95 
96  // get rid of whitespace, etc. (in between tokens it should be irrelevent)
98 
99  // maybe need to end some blocks
100  PopIndentToHere();
101 
102  // *****
103  // And now branch based on the next few characters!
104  // *****
105 
106  // end of stream
107  if(!INPUT)
108  return EndStream();
109 
110  if(INPUT.column() == 0 && INPUT.peek() == Keys::Directive)
111  return ScanDirective();
112 
113  // document token
114  if(INPUT.column() == 0 && Exp::DocStart().Matches(INPUT))
115  return ScanDocStart();
116 
117  if(INPUT.column() == 0 && Exp::DocEnd().Matches(INPUT))
118  return ScanDocEnd();
119 
120  // flow start/end/entry
122  return ScanFlowStart();
123 
125  return ScanFlowEnd();
126 
127  if(INPUT.peek() == Keys::FlowEntry)
128  return ScanFlowEntry();
129 
130  // block/map stuff
132  return ScanBlockEntry();
133 
134  if((InBlockContext() ? Exp::Key() : Exp::KeyInFlow()).Matches(INPUT))
135  return ScanKey();
136 
138  return ScanValue();
139 
140  // alias/anchor
141  if(INPUT.peek() == Keys::Alias || INPUT.peek() == Keys::Anchor)
142  return ScanAnchorOrAlias();
143 
144  // tag
145  if(INPUT.peek() == Keys::Tag)
146  return ScanTag();
147 
148  // special scalars
150  return ScanBlockScalar();
151 
152  if(INPUT.peek() == '\'' || INPUT.peek() == '\"')
153  return ScanQuotedScalar();
154 
155  // plain scalars
157  return ScanPlainScalar();
158 
159  // don't know what it is!
161  }
162 
163  // ScanToNextToken
164  // . Eats input until we reach the next token-like thing.
166  {
167  while(1) {
168  // first eat whitespace
169  while(INPUT && IsWhitespaceToBeEaten(INPUT.peek())) {
171  m_simpleKeyAllowed = false;
172  INPUT.eat(1);
173  }
174 
175  // then eat a comment
176  if(Exp::Comment().Matches(INPUT)) {
177  // eat until line break
178  while(INPUT && !Exp::Break().Matches(INPUT))
179  INPUT.eat(1);
180  }
181 
182  // if it's NOT a line break, then we're done!
183  if(!Exp::Break().Matches(INPUT))
184  break;
185 
186  // otherwise, let's eat the line break and keep going
187  int n = Exp::Break().Match(INPUT);
188  INPUT.eat(n);
189 
190  // oh yeah, and let's get rid of that simple key
192 
193  // new line - we may be able to accept a simple key now
194  if(InBlockContext())
195  m_simpleKeyAllowed = true;
196  }
197  }
198 
200  // Misc. helpers
201 
202  // IsWhitespaceToBeEaten
203  // . We can eat whitespace if it's a space or tab
204  // . Note: originally tabs in block context couldn't be eaten
205  // "where a simple key could be allowed
206  // (i.e., not at the beginning of a line, or following '-', '?', or ':')"
207  // I think this is wrong, since tabs can be non-content whitespace; it's just
208  // that they can't contribute to indentation, so once you've seen a tab in a
209  // line, you can't start a simple key
211  {
212  if(ch == ' ')
213  return true;
214 
215  if(ch == '\t')
216  return true;
217 
218  return false;
219  }
220 
221  // GetValueRegex
222  // . Get the appropriate regex to check if it's a value token
224  {
225  if(InBlockContext())
226  return Exp::Value();
227 
229  }
230 
231  // StartStream
232  // . Set the initial conditions for starting a stream.
234  {
235  m_startedStream = true;
236  m_simpleKeyAllowed = true;
237  std::auto_ptr<IndentMarker> pIndent(new IndentMarker(-1, IndentMarker::NONE));
238  m_indentRefs.push_back(pIndent);
239  m_indents.push(&m_indentRefs.back());
240  }
241 
242  // EndStream
243  // . Close out the stream, finish up, etc.
245  {
246  // force newline
247  if(INPUT.column() > 0)
248  INPUT.ResetColumn();
249 
250  PopAllIndents();
252 
253  m_simpleKeyAllowed = false;
254  m_endedStream = true;
255  }
256 
258  {
259  m_tokens.push(Token(type, INPUT.mark()));
260  return &m_tokens.back();
261  }
262 
264  {
265  switch(type) {
268  case IndentMarker::NONE: assert(false); break;
269  }
270  assert(false);
271  throw std::runtime_error("yaml-cpp: internal error, invalid indent type");
272  }
273 
274  // PushIndentTo
275  // . Pushes an indentation onto the stack, and enqueues the
276  // proper token (sequence start or mapping start).
277  // . Returns the indent marker it generates (if any).
279  {
280  // are we in flow?
281  if(InFlowContext())
282  return 0;
283 
284  std::auto_ptr<IndentMarker> pIndent(new IndentMarker(column, type));
285  IndentMarker& indent = *pIndent;
286  const IndentMarker& lastIndent = *m_indents.top();
287 
288  // is this actually an indentation?
289  if(indent.column < lastIndent.column)
290  return 0;
291  if(indent.column == lastIndent.column && !(indent.type == IndentMarker::SEQ && lastIndent.type == IndentMarker::MAP))
292  return 0;
293 
294  // push a start token
295  indent.pStartToken = PushToken(GetStartTokenFor(type));
296 
297  // and then the indent
298  m_indents.push(&indent);
299  m_indentRefs.push_back(pIndent);
300  return &m_indentRefs.back();
301  }
302 
303  // PopIndentToHere
304  // . Pops indentations off the stack until we reach the current indentation level,
305  // and enqueues the proper token each time.
306  // . Then pops all invalid indentations off.
308  {
309  // are we in flow?
310  if(InFlowContext())
311  return;
312 
313  // now pop away
314  while(!m_indents.empty()) {
315  const IndentMarker& indent = *m_indents.top();
316  if(indent.column < INPUT.column())
317  break;
318  if(indent.column == INPUT.column() && !(indent.type == IndentMarker::SEQ && !Exp::BlockEntry().Matches(INPUT)))
319  break;
320 
321  PopIndent();
322  }
323 
324  while(!m_indents.empty() && m_indents.top()->status == IndentMarker::INVALID)
325  PopIndent();
326  }
327 
328  // PopAllIndents
329  // . Pops all indentations (except for the base empty one) off the stack,
330  // and enqueues the proper token each time.
332  {
333  // are we in flow?
334  if(InFlowContext())
335  return;
336 
337  // now pop away
338  while(!m_indents.empty()) {
339  const IndentMarker& indent = *m_indents.top();
340  if(indent.type == IndentMarker::NONE)
341  break;
342 
343  PopIndent();
344  }
345  }
346 
347  // PopIndent
348  // . Pops a single indent, pushing the proper token
350  {
351  const IndentMarker& indent = *m_indents.top();
352  m_indents.pop();
353 
354  if(indent.status != IndentMarker::VALID) {
356  return;
357  }
358 
359  if(indent.type == IndentMarker::SEQ)
361  else if(indent.type == IndentMarker::MAP)
363  }
364 
365  // GetTopIndent
367  {
368  if(m_indents.empty())
369  return 0;
370  return m_indents.top()->column;
371  }
372 
373  // ThrowParserException
374  // . Throws a ParserException with the current token location
375  // (if available).
376  // . Does not parse any more tokens.
378  {
379  Mark mark = Mark::null();
380  if(!m_tokens.empty()) {
381  const Token& token = m_tokens.front();
382  mark = token.mark;
383  }
384  throw ParserException(mark, msg);
385  }
386 }
387 
const char FoldedScalar
Definition: exp.h:190
const RegEx & BlockEntry()
Definition: exp.h:89
void ScanBlockScalar()
Definition: scantoken.cpp:377
const RegEx & Key()
Definition: exp.h:93
Stream INPUT
Definition: scanner.h:115
void ScanFlowStart()
Definition: scantoken.cpp:87
static const Mark null()
Definition: mark.h:16
std::queue< Token > m_tokens
Definition: scanner.h:118
const char FlowMapEnd
Definition: exp.h:184
std::stack< IndentMarker * > m_indents
Definition: scanner.h:125
void ScanDirective()
Definition: scantoken.cpp:17
void ScanPlainScalar()
Definition: scantoken.cpp:299
const char Alias
Definition: exp.h:186
IndentMarker * PushIndentTo(int column, IndentMarker::INDENT_TYPE type)
Definition: scanner.cpp:278
::std::string string
Definition: gtest.h:1979
void ScanFlowEntry()
Definition: scantoken.cpp:135
void ResetColumn()
Definition: stream.h:42
Token::TYPE GetStartTokenFor(IndentMarker::INDENT_TYPE type) const
Definition: scanner.cpp:263
void EndStream()
Definition: scanner.cpp:244
const RegEx & GetValueRegex() const
Definition: scanner.cpp:223
void PopIndent()
Definition: scanner.cpp:349
bool m_simpleKeyAllowed
Definition: scanner.h:122
void EnsureTokensInQueue()
Definition: scanner.cpp:57
bool InBlockContext() const
Definition: scanner.h:59
const char *const UNKNOWN_TOKEN
Definition: exceptions.h:40
bool IsWhitespaceToBeEaten(char ch)
Definition: scanner.cpp:210
int GetTopIndent() const
Definition: scanner.cpp:366
const RegEx & KeyInFlow()
Definition: exp.h:97
const RegEx & Break()
Definition: exp.h:34
void ScanToNextToken()
Definition: scanner.cpp:165
void PopAllIndents()
Definition: scanner.cpp:331
const RegEx & Value()
Definition: exp.h:101
void ScanBlockEntry()
Definition: scantoken.cpp:155
const char FlowSeqEnd
Definition: exp.h:182
void InvalidateSimpleKey()
Definition: simplekey.cpp:87
const RegEx & Tab()
Definition: exp.h:26
const char Anchor
Definition: exp.h:187
void eat(int n=1)
Definition: stream.cpp:272
const RegEx & ValueInJSONFlow()
Definition: exp.h:109
const RegEx & PlainScalarInFlow()
Definition: exp.h:143
int column() const
Definition: stream.h:41
Mark mark
Definition: token.h:78
void PopIndentToHere()
Definition: scanner.cpp:307
void ScanQuotedScalar()
Definition: scantoken.cpp:335
void ScanNextToken()
Definition: scanner.cpp:88
ptr_vector< IndentMarker > m_indentRefs
Definition: scanner.h:126
bool InFlowContext() const
Definition: scanner.h:58
int Match(const std::string &str) const
Definition: regeximpl.h:42
void ScanAnchorOrAlias()
Definition: scantoken.cpp:225
const RegEx & DocEnd()
Definition: exp.h:81
const RegEx & PlainScalar()
Definition: exp.h:139
bool m_canBeJSONFlow
Definition: scanner.h:123
Scanner(std::istream &in)
Definition: scanner.cpp:10
char peek() const
Definition: stream.cpp:228
void ThrowParserException(const std::string &msg) const
Definition: scanner.cpp:377
const char Tag
Definition: exp.h:188
const char FlowEntry
Definition: exp.h:185
const Mark mark() const
Definition: stream.h:38
void ScanDocStart()
Definition: scantoken.cpp:59
void StartStream()
Definition: scanner.cpp:233
const char Directive
Definition: exp.h:180
bool m_startedStream
Definition: scanner.h:121
void PopAllSimpleKeys()
Definition: simplekey.cpp:133
Token & peek()
Definition: scanner.cpp:38
Token * PushToken(Token::TYPE type)
Definition: scanner.cpp:257
const char FlowSeqStart
Definition: exp.h:181
PM::Matches Matches
bool m_endedStream
Definition: scanner.h:121
const RegEx & DocStart()
Definition: exp.h:77
const char FlowMapStart
Definition: exp.h:183
bool Matches(char ch) const
Definition: regeximpl.h:16
const char LiteralScalar
Definition: exp.h:189
const RegEx Comment()
Definition: exp.h:113
STATUS status
Definition: token.h:76
const RegEx & ValueInFlow()
Definition: exp.h:105


libpointmatcher
Author(s):
autogenerated on Sat May 27 2023 02:38:03