delimited.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2023 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_DSL_DELIMITED_HPP_INCLUDED
5 #define LEXY_DSL_DELIMITED_HPP_INCLUDED
6 
7 #include <lexy/_detail/swar.hpp>
8 #include <lexy/dsl/base.hpp>
9 #include <lexy/dsl/capture.hpp>
10 #include <lexy/dsl/char_class.hpp>
11 #include <lexy/dsl/literal.hpp>
12 #include <lexy/dsl/symbol.hpp>
13 #include <lexy/dsl/whitespace.hpp>
14 
15 namespace lexy
16 {
19 {
20  static LEXY_CONSTEVAL auto name()
21  {
22  return "missing delimiter";
23  }
24 };
25 } // namespace lexy
26 
27 namespace lexyd
28 {
29 template <typename CharClass, typename Reader>
30 struct _del_chars
31 {
32  typename Reader::iterator begin;
33 
34  constexpr _del_chars(const Reader& reader) : begin(reader.position()) {}
35 
36  template <typename Context>
37  constexpr void _recover(Context& context, typename Reader::iterator recover_begin,
38  typename Reader::iterator recover_end)
39  {
40  CharClass::template char_class_report_error<Reader>(context, recover_begin);
41 
42  // We recovery by discarding the ASCII character.
43  // (We've checked for EOF before, so that's not the error.)
44  context.on(_ev::recovery_start{}, recover_begin);
45  context.on(_ev::token{}, lexy::error_token_kind, recover_begin, recover_end);
46  context.on(_ev::recovery_finish{}, recover_end);
47 
48  // Restart the next character here.
49  begin = recover_end;
50  }
51 
52  template <typename Close, typename... Escs>
53  constexpr void parse_swar(Reader& reader, Close, Escs...)
54  {
55  using encoding = typename Reader::encoding;
56 
57  // If we have a SWAR reader and the Close and Escape chars are literal rules,
58  // we can munch as much content as possible in a fast loop.
59  // We also need to efficiently check for the CharClass for it to make sense.
60  if constexpr (lexy::_detail::is_swar_reader<Reader> //
61  && (lexy::is_literal_rule<Close> && ... && Escs::esc_is_literal)
62  && !std::is_same_v<
63  decltype(CharClass::template char_class_match_swar<encoding>({})),
64  std::false_type>)
65  {
66  using char_type = typename encoding::char_type;
68 
69  while (true)
70  {
71  auto cur = reader.peek_swar();
72 
73  // If we have an EOF or the initial character of the closing delimiter, we exit as
74  // we have no more content.
76  || swar_has_char<char_type, Close::template lit_first_char<encoding>()>(cur))
77  break;
78 
79  // The same is true if we have the escape character.
80  if constexpr (sizeof...(Escs) > 0)
81  {
82  if ((swar_has_char<char_type, Escs::template esc_first_char<encoding>()>(cur)
83  || ...))
84  break;
85  }
86 
87  // We definitely don't have the end of the delimited content in the current SWAR,
88  // check if they all follow the char class.
89  if (!CharClass::template char_class_match_swar<encoding>(cur))
90  // They don't or we need to look closer, exit the loop.
91  break;
92 
93  reader.bump_swar();
94  }
95  }
96  }
97 
98  // Precondition: the next code unit definitely belongs to the content, not the delimiter.
99  template <typename Context, typename Sink>
100  constexpr void parse_one(Context& context, Reader& reader, Sink& sink)
101  {
102  using encoding = typename Reader::encoding;
103 
104  // First try to match the ASCII characters.
106  if (matcher::template match<encoding>(reader.peek()))
107  {
108  reader.bump();
109  }
110  else if constexpr (!std::is_same_v<decltype(CharClass::char_class_match_cp(char32_t())),
111  std::false_type>)
112  {
113  // Try to match any code point in default_encoding or byte_encoding.
114  if constexpr (std::is_same_v<encoding, lexy::default_encoding> //
115  || std::is_same_v<encoding, lexy::byte_encoding>)
116  {
117  static_assert(!CharClass::char_class_unicode(),
118  "cannot use this character class with default/byte_encoding");
119  LEXY_ASSERT(reader.peek() != encoding::eof(),
120  "EOF should be checked before calling this");
121 
122  auto recover_begin = reader.position();
123  auto cp = static_cast<char32_t>(reader.peek());
124  reader.bump();
125 
126  if (!CharClass::char_class_match_cp(cp))
127  {
128  finish(context, sink, recover_begin);
129  _recover(context, recover_begin, reader.position());
130  }
131  }
132  // Otherwise, try to match Unicode characters.
133  else
134  {
135  static_assert(CharClass::char_class_unicode(),
136  "cannot use this character class with Unicode encoding");
137 
138  auto result = lexy::_detail::parse_code_point(reader);
139  if (result.error == lexy::_detail::cp_error::success
140  && CharClass::char_class_match_cp(result.cp))
141  {
142  reader.set_position(result.end);
143  }
144  else
145  {
146  finish(context, sink, reader.position());
147  _recover(context, reader.position(), result.end);
148  reader.set_position(result.end);
149  }
150  }
151  }
152  // It doesn't match Unicode characters.
153  else
154  {
155  // We can just discard the invalid ASCII character.
156  LEXY_ASSERT(reader.peek() != encoding::eof(),
157  "EOF should be checked before calling this");
158  auto recover_begin = reader.position();
159  reader.bump();
160  auto recover_end = reader.position();
161 
162  finish(context, sink, recover_begin);
163  _recover(context, recover_begin, recover_end);
164  }
165  }
166 
167  template <typename Context, typename Sink>
168  constexpr void finish(Context& context, Sink& sink, typename Reader::iterator end)
169  {
170  if (begin == end)
171  return;
172 
173  context.on(_ev::token{}, typename CharClass::token_type{}, begin, end);
175  }
176 };
177 
178 template <typename Token, typename Error = lexy::missing_delimiter>
180 {
181  using error = Error;
182 
183  template <typename Reader>
184  static constexpr bool peek(Reader reader)
185  {
186  return lexy::try_match_token(Token{}, reader) || reader.peek() == Reader::encoding::eof();
187  }
188 };
189 template <typename Error>
190 struct _del_limit<void, Error>
191 {
192  using error = Error;
193 
194  template <typename Reader>
195  static constexpr bool peek(Reader reader)
196  {
197  return reader.peek() == Reader::encoding::eof();
198  }
199 };
200 
201 template <typename Close, typename Char, typename Limit, typename... Escapes>
202 struct _del : rule_base
203 {
204  using _limit = std::conditional_t<std::is_void_v<Limit> || lexy::is_token_rule<Limit>,
206 
207  template <typename CloseParser, typename Context, typename Reader, typename Sink>
208  LEXY_PARSER_FUNC static bool _loop(CloseParser& close, Context& context, Reader& reader,
209  Sink& sink)
210  {
211  auto del_begin = reader.position();
212  _del_chars<Char, Reader> cur_chars(reader);
213  while (true)
214  {
215  // Parse as many content chars as possible.
216  // If it returns, we need to look closer at the next char.
217  cur_chars.parse_swar(reader, Close{}, Escapes{}...);
218 
219  // Check for closing delimiter.
220  if (close.try_parse(context.control_block, reader))
221  break;
222  close.cancel(context);
223 
224  // Check for missing delimiter.
225  if (_limit::peek(reader))
226  {
227  // We're done, so finish the current characters.
228  auto end = reader.position();
229  cur_chars.finish(context, sink, end);
230 
231  auto err = lexy::error<Reader, typename _limit::error>(del_begin, end);
232  context.on(_ev::error{}, err);
233  return false;
234  }
235 
236  // Check for escape sequences.
237  if ((Escapes::esc_try_parse(context, reader, sink, cur_chars) || ...))
238  // We had an escape sequence, so do nothing in this iteration.
239  continue;
240 
241  // It is actually a content char, consume it.
242  cur_chars.parse_one(context, reader, sink);
243  }
244 
245  // Finish the currently active character sequence.
246  cur_chars.finish(context, sink, reader.position());
247  return true;
248  }
249 
250  template <typename NextParser>
251  struct p
252  {
253  template <typename Context, typename Reader, typename... Args>
254  LEXY_PARSER_FUNC static bool parse(Context& context, Reader& reader, Args&&... args)
255  {
256  auto sink = context.value_callback().sink();
257 
258  // Parse characters until we have the closing delimiter.
260  if (!_loop(close, context, reader, sink))
261  return false;
262 
263  // We're done, finish the sink and then the closing delimiter.
264  if constexpr (std::is_same_v<typename decltype(sink)::return_type, void>)
265  {
266  LEXY_MOV(sink).finish();
267  return close.template finish<NextParser>(context, reader, LEXY_FWD(args)...);
268  }
269  else
270  {
271  return close.template finish<NextParser>(context, reader, LEXY_FWD(args)...,
272  LEXY_MOV(sink).finish());
273  }
274  }
275  };
276 };
277 
279 {};
280 
281 template <typename Open, typename Close, typename Limit = void>
283 {
285  template <typename LimitCharClass>
286  constexpr auto limit(LimitCharClass) const
287  {
288  static_assert(std::is_void_v<Limit> && lexy::is_char_class_rule<LimitCharClass>);
289 
291  }
293  template <typename Error, typename LimitCharClass>
294  constexpr auto limit(LimitCharClass) const
295  {
296  static_assert(std::is_void_v<Limit> && lexy::is_char_class_rule<LimitCharClass>);
298  }
299 
300  //=== rules ===//
302  template <typename Char, typename... Escapes>
303  constexpr auto operator()(Char, Escapes...) const
304  {
305  static_assert(lexy::is_char_class_rule<Char>);
306  static_assert((std::is_base_of_v<_escape_base, Escapes> && ...));
308  }
309 
310  //=== access ===//
312  constexpr auto open() const
313  {
314  return Open{};
315  }
317  constexpr auto close() const
318  {
319  // Close never has any whitespace.
320  return Close{};
321  }
322 };
323 
325 template <typename Open, typename Close>
326 constexpr auto delimited(Open, Close)
327 {
328  static_assert(lexy::is_branch_rule<Open> && lexy::is_branch_rule<Close>);
329  return _delim_dsl<Open, Close>{};
330 }
331 
333 template <typename Delim>
334 constexpr auto delimited(Delim)
335 {
336  static_assert(lexy::is_branch_rule<Delim>);
337  return _delim_dsl<Delim, Delim>{};
338 }
339 
340 constexpr auto quoted = delimited(LEXY_LIT("\""));
341 constexpr auto triple_quoted = delimited(LEXY_LIT("\"\"\""));
342 
343 constexpr auto single_quoted = delimited(LEXY_LIT("'"));
344 
345 constexpr auto backticked = delimited(LEXY_LIT("`"));
346 constexpr auto double_backticked = delimited(LEXY_LIT("``"));
347 constexpr auto triple_backticked = delimited(LEXY_LIT("```"));
348 } // namespace lexyd
349 
350 namespace lexy
351 {
353 {
354  static LEXY_CONSTEVAL auto name()
355  {
356  return "invalid escape sequence";
357  }
358 };
359 } // namespace lexy
360 
361 namespace lexyd
362 {
363 template <typename Escape, typename... Branches>
365 {
366  static constexpr bool esc_is_literal = lexy::is_literal_rule<Escape>;
367  template <typename Encoding>
368  static constexpr auto esc_first_char() -> typename Encoding::char_type
369  {
370  return Escape::template lit_first_char<Encoding>();
371  }
372 
373  template <typename Context, typename Reader, typename Sink, typename Char>
374  static constexpr bool esc_try_parse(Context& context, Reader& reader, Sink& sink,
375  _del_chars<Char, Reader>& cur_chars)
376  {
377  auto begin = reader.position();
378 
379  // Check whether we're having the initial escape character.
381  if (!token.try_parse(context.control_block, reader))
382  // No need to call `.cancel()`; it's a token.
383  return false;
384 
385  // We do, so finish current character sequence and consume the escape token.
386  cur_chars.finish(context, sink, begin);
387  // It's a token, so this can't fail.
388  token.template finish<lexy::pattern_parser<>>(context, reader);
389 
390  // Try to parse the correct branch.
391  auto try_parse_branch = [&](auto branch) {
392  lexy::branch_parser_for<decltype(branch), Reader> parser{};
393  if (!parser.try_parse(context.control_block, reader))
394  {
395  parser.cancel(context);
396  return false;
397  }
398 
399  // This might fail, but we don't care:
400  // it will definitely consume the escape token, and everything that is a valid prefix.
401  // The remaining stuff is then just treated as part of the delimited.
402  parser.template finish<lexy::sink_parser>(context, reader, sink);
403  return true;
404  };
405  auto found = (try_parse_branch(Branches{}) || ...);
406 
407  if constexpr ((lexy::is_unconditional_branch_rule<Branches> || ...))
408  {
409  LEXY_ASSERT(found, "there is an unconditional branch");
410  }
411  else if (!found)
412  {
413  // We haven't found any branch of the escape sequence.
414  auto err = lexy::error<Reader, lexy::invalid_escape_sequence>(begin, reader.position());
415  context.on(_ev::error{}, err);
416  }
417 
418  // Restart the current character sequence after the escape sequence.
419  cur_chars.begin = reader.position();
420  return true;
421  }
422 
424  template <typename Branch>
425  constexpr auto rule(Branch) const
426  {
427  static_assert(lexy::is_branch_rule<Branch>);
428  return _escape<Escape, Branches..., Branch>{};
429  }
430 
432  template <typename Branch>
433  constexpr auto capture(Branch branch) const
434  {
435  static_assert(lexy::is_branch_rule<Branch>);
436  return this->rule(lexy::dsl::capture(branch));
437  }
438 
440  template <const auto& Table, typename Rule>
441  constexpr auto symbol(Rule rule) const
442  {
443  return this->rule(lexyd::symbol<Table>(rule));
444  }
445  template <const auto& Table>
446  constexpr auto symbol() const
447  {
448  return this->rule(lexyd::symbol<Table>);
449  }
450 };
451 
455 template <typename EscapeToken>
456 constexpr auto escape(EscapeToken)
457 {
458  static_assert(lexy::is_token_rule<EscapeToken>);
459  return _escape<EscapeToken>{};
460 }
461 
462 constexpr auto backslash_escape = escape(lit_c<'\\'>);
463 constexpr auto dollar_escape = escape(lit_c<'$'>);
464 } // namespace lexyd
465 
466 #endif // LEXY_DSL_DELIMITED_HPP_INCLUDED
467 
lexyd::_delim_dsl::operator()
constexpr auto operator()(Char, Escapes...) const
Sets the content.
Definition: delimited.hpp:303
lexyd::position
constexpr auto position
Produces an iterator to the current reader position without parsing anything.
Definition: position.hpp:79
LEXY_MOV
#define LEXY_MOV(...)
Definition: config.hpp:21
lexy::invalid_escape_sequence::name
static LEXY_CONSTEVAL auto name()
Definition: delimited.hpp:354
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:90
lexyd::token
constexpr auto token(Rule)
Turns the arbitrary rule into a token by matching it without producing any values.
Definition: dsl/token.hpp:214
lexy::parse_events::recovery_start
Definition: dsl/base.hpp:61
lexyd::_delim_dsl::limit
constexpr auto limit(LimitCharClass) const
Add char classes that will limit the delimited and specify the error.
Definition: delimited.hpp:294
lexyd::peek
constexpr auto peek(Rule)
Definition: peek.hpp:166
lexy::_detail::ascii_set_matcher
Definition: char_class.hpp:139
lexyd::quoted
constexpr auto quoted
Definition: delimited.hpp:340
lexyd::dollar_escape
constexpr auto dollar_escape
Definition: delimited.hpp:463
symbol.hpp
literal.hpp
LEXY_LIT
#define LEXY_LIT(Str)
Definition: literal.hpp:390
lexyd::escape
constexpr auto escape(EscapeToken)
Definition: delimited.hpp:456
magic_enum::char_type
string_view::value_type char_type
Definition: magic_enum.hpp:145
lexyd::delimited
constexpr auto delimited(Open, Close)
Parses everything between the two delimiters and captures it.
Definition: delimited.hpp:326
lexyd::_escape::capture
constexpr auto capture(Branch branch) const
Adds an escape rule that captures the branch.
Definition: delimited.hpp:433
lexy::_detail::parse_code_point
constexpr cp_result< Reader > parse_code_point(Reader reader)
Definition: _detail/code_point.hpp:142
lexy::branch_parser_for
typename BranchRule::template bp< Reader > branch_parser_for
Definition: dsl/base.hpp:103
lexyd::_del_chars::_del_chars
constexpr _del_chars(const Reader &reader)
Definition: delimited.hpp:34
lexyd::_delim_dsl::limit
constexpr auto limit(LimitCharClass) const
Add char classes that will limit the delimited to detect a missing terminator.
Definition: delimited.hpp:286
LEXY_FWD
#define LEXY_FWD(...)
Definition: config.hpp:22
lexyd::no_whitespace
constexpr auto no_whitespace(Rule)
Disables automatic skipping of whitespace for all tokens of the given rule.
Definition: whitespace.hpp:308
lexyd::_del::_loop
static LEXY_PARSER_FUNC bool _loop(CloseParser &close, Context &context, Reader &reader, Sink &sink)
Definition: delimited.hpp:208
lexyd::_escape::rule
constexpr auto rule(Branch) const
Adds a generic escape rule.
Definition: delimited.hpp:425
lexy
Definition: any_ref.hpp:12
lexyd::_del_limit::error
Error error
Definition: delimited.hpp:181
cx::end
constexpr auto end(const C &c) -> decltype(c.end())
Definition: wildcards.hpp:686
char_class.hpp
detail::void
j template void())
Definition: json.hpp:4893
lexy::_detail::swar_has_char
constexpr bool swar_has_char(swar_int v)
Definition: swar.hpp:177
lexyd::_del_limit< void, Error >::error
Error error
Definition: delimited.hpp:192
lexyd::_del_chars
Definition: delimited.hpp:30
lexy::error
Generic failure.
Definition: error.hpp:14
lexyd::_escape::esc_try_parse
static constexpr bool esc_try_parse(Context &context, Reader &reader, Sink &sink, _del_chars< Char, Reader > &cur_chars)
Definition: delimited.hpp:374
lexyd::_escape::symbol
constexpr auto symbol(Rule rule) const
Adds an escape rule that parses the symbol.
Definition: delimited.hpp:441
lexyd::single_quoted
constexpr auto single_quoted
Definition: delimited.hpp:343
lexyd::_del::p
Definition: delimited.hpp:251
lexyd::_del_chars::parse_swar
constexpr void parse_swar(Reader &reader, Close, Escs...)
Definition: delimited.hpp:53
lexyd::_del_limit
Definition: delimited.hpp:179
lexyd::_delim_dsl::close
constexpr auto close() const
Matches the closing delimiter.
Definition: delimited.hpp:317
swar.hpp
lexyd::_del_chars::finish
constexpr void finish(Context &context, Sink &sink, typename Reader::iterator end)
Definition: delimited.hpp:168
capture.hpp
lexy::parse_events::error
Definition: dsl/base.hpp:55
lexy::try_match_token
constexpr LEXY_FORCE_INLINE auto try_match_token(TokenRule, Reader &reader)
Definition: dsl/base.hpp:232
lexyd::_escape::esc_is_literal
static constexpr bool esc_is_literal
Definition: delimited.hpp:366
lexyd::_del_chars::begin
Reader::iterator begin
Definition: delimited.hpp:32
lexyd::_del_limit< void, Error >::peek
static constexpr bool peek(Reader reader)
Definition: delimited.hpp:195
lexyd::_delim_dsl
Definition: delimited.hpp:282
lexyd::_escape::esc_first_char
static constexpr auto esc_first_char() -> typename Encoding::char_type
Definition: delimited.hpp:368
lexyd::rule_base
Definition: grammar.hpp:17
lexy::missing_delimiter::name
static LEXY_CONSTEVAL auto name()
Definition: delimited.hpp:20
lexy::error_token_kind
@ error_token_kind
Definition: grammar.hpp:77
lexyd::backslash_escape
constexpr auto backslash_escape
Definition: delimited.hpp:462
lexy::parse_events::recovery_finish
Definition: dsl/base.hpp:66
lexyd::triple_backticked
constexpr auto triple_backticked
Definition: delimited.hpp:347
LEXY_PARSER_FUNC
#define LEXY_PARSER_FUNC
Definition: dsl/base.hpp:95
cx::begin
constexpr auto begin(const C &c) -> decltype(c.begin())
Definition: wildcards.hpp:661
lexyd::backticked
constexpr auto backticked
Definition: delimited.hpp:345
whitespace.hpp
base.hpp
lexyd::double_backticked
constexpr auto double_backticked
Definition: delimited.hpp:346
lexyd::_del::_limit
std::conditional_t< std::is_void_v< Limit >||lexy::is_token_rule< Limit >, _del_limit< Limit >, Limit > _limit
Definition: delimited.hpp:205
lexyd::_del::p::parse
static LEXY_PARSER_FUNC bool parse(Context &context, Reader &reader, Args &&... args)
Definition: delimited.hpp:254
lexy::parse_events::token
Definition: dsl/base.hpp:44
lexy::lexeme
Definition: lexeme.hpp:16
lexy::_detail::cp_error::success
@ success
lexyd::_del_limit::peek
static constexpr bool peek(Reader reader)
Definition: delimited.hpp:184
lexyd::_del
Definition: delimited.hpp:202
lexyd::_escape::symbol
constexpr auto symbol() const
Definition: delimited.hpp:446
lexyd::_del_chars::_recover
constexpr void _recover(Context &context, typename Reader::iterator recover_begin, typename Reader::iterator recover_end)
Definition: delimited.hpp:37
lexyd
Definition: trace.hpp:22
lexyd::triple_quoted
constexpr auto triple_quoted
Definition: delimited.hpp:341
lexyd::capture
constexpr auto capture(Token)
Captures whatever the token matches as a lexeme; does not include trailing whitespace.
Definition: capture.hpp:127
lexyd::eof
constexpr auto eof
Matches EOF.
Definition: eof.hpp:72
LEXY_ASSERT
#define LEXY_ASSERT(Expr, Msg)
Definition: assert.hpp:37
lexy::missing_delimiter
The reader ends before the closing delimiter was found.
Definition: delimited.hpp:18
lexyd::_escape
Definition: delimited.hpp:364
lexy::invalid_escape_sequence
Definition: delimited.hpp:352
lexyd::_escape_base
Definition: delimited.hpp:278
lexyd::_delim_dsl::open
constexpr auto open() const
Matches the open delimiter.
Definition: delimited.hpp:312
lexyd::_del_chars::parse_one
constexpr void parse_one(Context &context, Reader &reader, Sink &sink)
Definition: delimited.hpp:100


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Jun 28 2024 02:20:07