delimited.hpp
Go to the documentation of this file.
1 // Copyright (C) 2020-2024 Jonathan Müller and lexy contributors
2 // SPDX-License-Identifier: BSL-1.0
3 
4 #ifndef LEXY_DSL_DELIMITED_HPP_INCLUDED
5 #define LEXY_DSL_DELIMITED_HPP_INCLUDED
6 
7 #include <lexy/_detail/swar.hpp>
8 #include <lexy/dsl/base.hpp>
9 #include <lexy/dsl/capture.hpp>
10 #include <lexy/dsl/char_class.hpp>
11 #include <lexy/dsl/literal.hpp>
12 #include <lexy/dsl/symbol.hpp>
13 #include <lexy/dsl/whitespace.hpp>
14 
15 namespace lexy
16 {
19 {
20  static LEXY_CONSTEVAL auto name()
21  {
22  return "missing delimiter";
23  }
24 };
25 } // namespace lexy
26 
27 namespace lexyd
28 {
29 template <typename CharClass, typename Reader>
30 struct _del_chars
31 {
32  typename Reader::iterator begin;
33 
34  constexpr _del_chars(const Reader& reader) : begin(reader.position()) {}
35 
36  template <typename Context>
37  constexpr void _recover(Context& context, typename Reader::iterator recover_begin,
38  typename Reader::iterator recover_end)
39  {
40  CharClass::template char_class_report_error<Reader>(context, recover_begin);
41 
42  // We recovery by discarding the ASCII character.
43  // (We've checked for EOF before, so that's not the error.)
44  context.on(_ev::recovery_start{}, recover_begin);
45  context.on(_ev::token{}, lexy::error_token_kind, recover_begin, recover_end);
46  context.on(_ev::recovery_finish{}, recover_end);
47 
48  // Restart the next character here.
49  begin = recover_end;
50  }
51 
52  template <typename Close, typename... Escs>
53  constexpr void parse_swar(Reader& reader, Close, Escs...)
54  {
55  using encoding = typename Reader::encoding;
56 
57  // If we have a SWAR reader and the Close and Escape chars are literal rules,
58  // we can munch as much content as possible in a fast loop.
59  // We also need to efficiently check for the CharClass for it to make sense.
60  if constexpr (lexy::_detail::is_swar_reader<Reader> //
61  && (lexy::is_literal_rule<Close> && ... && Escs::esc_is_literal)
62  && !std::is_same_v<
63  decltype(CharClass::template char_class_match_swar<encoding>({})),
64  std::false_type>)
65  {
66  using char_type = typename encoding::char_type;
68 
69  while (true)
70  {
71  auto cur = reader.peek_swar();
72 
73  // If we have an EOF or the initial character of the closing delimiter, we exit as
74  // we have no more content.
76  || swar_has_char<char_type, Close::template lit_first_char<encoding>()>(cur))
77  break;
78 
79  // The same is true if we have the escape character.
80  if constexpr (sizeof...(Escs) > 0)
81  {
82  if ((swar_has_char<char_type, Escs::template esc_first_char<encoding>()>(cur)
83  || ...))
84  break;
85  }
86 
87  // We definitely don't have the end of the delimited content in the current SWAR,
88  // check if they all follow the char class.
89  if (!CharClass::template char_class_match_swar<encoding>(cur))
90  // They don't or we need to look closer, exit the loop.
91  break;
92 
93  reader.bump_swar();
94  }
95  }
96  }
97 
98  // Precondition: the next code unit definitely belongs to the content, not the delimiter.
99  template <typename Context, typename Sink>
100  constexpr void parse_one(Context& context, Reader& reader, Sink& sink)
101  {
102  using encoding = typename Reader::encoding;
103 
104  // First try to match the ASCII characters.
106  if (matcher::template match<encoding>(reader.peek()))
107  {
108  reader.bump();
109  }
110  else if constexpr (!std::is_same_v<decltype(CharClass::char_class_match_cp(char32_t())),
111  std::false_type>)
112  {
113  if constexpr (lexy::is_unicode_encoding<encoding>)
114  {
115  static_assert(CharClass::char_class_unicode(),
116  "cannot use this character class with Unicode encoding");
117 
118  auto result = lexy::_detail::parse_code_point(reader);
119  if (result.error == lexy::_detail::cp_error::success
120  && CharClass::char_class_match_cp(result.cp))
121  {
122  reader.reset(result.end);
123  }
124  else
125  {
126  finish(context, sink, reader.position());
127 
128  auto recover_begin = reader.position();
129  if (recover_begin == result.end.position())
130  reader.bump();
131  else
132  reader.reset(result.end);
133  _recover(context, recover_begin, reader.position());
134  }
135  }
136  else
137  {
138  static_assert(!CharClass::char_class_unicode(),
139  "cannot use this character class with non-Unicode char encoding");
140  LEXY_ASSERT(reader.peek() != encoding::eof(),
141  "EOF should be checked before calling this");
142 
143  auto recover_begin = reader.position();
144  auto cp = static_cast<char32_t>(reader.peek());
145  reader.bump();
146 
147  if (!CharClass::char_class_match_cp(cp))
148  {
149  finish(context, sink, recover_begin);
150  _recover(context, recover_begin, reader.position());
151  }
152  }
153  }
154  // It doesn't match Unicode characters.
155  else
156  {
157  // We can just discard the invalid ASCII character.
158  LEXY_ASSERT(reader.peek() != encoding::eof(),
159  "EOF should be checked before calling this");
160  auto recover_begin = reader.position();
161  reader.bump();
162  auto recover_end = reader.position();
163 
164  finish(context, sink, recover_begin);
165  _recover(context, recover_begin, recover_end);
166  }
167  }
168 
169  template <typename Context, typename Sink>
170  constexpr void finish(Context& context, Sink& sink, typename Reader::iterator end)
171  {
172  if (begin == end)
173  return;
174 
175  context.on(_ev::token{}, typename CharClass::token_type{}, begin, end);
177  }
178 };
179 
180 template <typename Token, typename Error = lexy::missing_delimiter>
182 {
183  using error = Error;
184 
185  template <typename Reader>
186  static constexpr bool peek(Reader reader)
187  {
188  return lexy::try_match_token(Token{}, reader) || reader.peek() == Reader::encoding::eof();
189  }
190 };
191 template <typename Error>
192 struct _del_limit<void, Error>
193 {
194  using error = Error;
195 
196  template <typename Reader>
197  static constexpr bool peek(Reader reader)
198  {
199  return reader.peek() == Reader::encoding::eof();
200  }
201 };
202 
203 template <typename Close, typename Char, typename Limit, typename... Escapes>
204 struct _del : rule_base
205 {
206  using _limit = std::conditional_t<std::is_void_v<Limit> || lexy::is_token_rule<Limit>,
208 
209  template <typename CloseParser, typename Context, typename Reader, typename Sink>
210  LEXY_PARSER_FUNC static bool _loop(CloseParser& close, Context& context, Reader& reader,
211  Sink& sink)
212  {
213  auto del_begin = reader.position();
214  _del_chars<Char, Reader> cur_chars(reader);
215  while (true)
216  {
217  // Parse as many content chars as possible.
218  // If it returns, we need to look closer at the next char.
219  cur_chars.parse_swar(reader, Close{}, Escapes{}...);
220 
221  // Check for closing delimiter.
222  if (close.try_parse(context.control_block, reader))
223  break;
224  close.cancel(context);
225 
226  // Check for missing delimiter.
227  if (_limit::peek(reader))
228  {
229  // We're done, so finish the current characters.
230  auto end = reader.position();
231  cur_chars.finish(context, sink, end);
232 
233  auto err = lexy::error<Reader, typename _limit::error>(del_begin, end);
234  context.on(_ev::error{}, err);
235  return false;
236  }
237 
238  // Check for escape sequences.
239  if ((Escapes::esc_try_parse(context, reader, sink, cur_chars) || ...))
240  // We had an escape sequence, so do nothing in this iteration.
241  continue;
242 
243  // It is actually a content char, consume it.
244  cur_chars.parse_one(context, reader, sink);
245  }
246 
247  // Finish the currently active character sequence.
248  cur_chars.finish(context, sink, reader.position());
249  return true;
250  }
251 
252  template <typename NextParser>
253  struct p
254  {
255  template <typename Context, typename Reader, typename... Args>
256  LEXY_PARSER_FUNC static bool parse(Context& context, Reader& reader, Args&&... args)
257  {
258  static_assert(lexy::is_char_encoding<typename Reader::encoding>);
259  auto sink = context.value_callback().sink();
260 
261  // Parse characters until we have the closing delimiter.
263  if (!_loop(close, context, reader, sink))
264  return false;
265 
266  // We're done, finish the sink and then the closing delimiter.
267  if constexpr (std::is_same_v<typename decltype(sink)::return_type, void>)
268  {
269  LEXY_MOV(sink).finish();
270  return close.template finish<NextParser>(context, reader, LEXY_FWD(args)...);
271  }
272  else
273  {
274  return close.template finish<NextParser>(context, reader, LEXY_FWD(args)...,
275  LEXY_MOV(sink).finish());
276  }
277  }
278  };
279 };
280 
282 {};
283 
284 template <typename Open, typename Close, typename Limit = void>
286 {
288  template <typename LimitCharClass>
289  constexpr auto limit(LimitCharClass) const
290  {
291  static_assert(std::is_void_v<Limit> && lexy::is_char_class_rule<LimitCharClass>);
292 
294  }
296  template <typename Error, typename LimitCharClass>
297  constexpr auto limit(LimitCharClass) const
298  {
299  static_assert(std::is_void_v<Limit> && lexy::is_char_class_rule<LimitCharClass>);
301  }
302 
303  //=== rules ===//
305  template <typename Char, typename... Escapes>
306  constexpr auto operator()(Char, Escapes...) const
307  {
308  static_assert(lexy::is_char_class_rule<Char>);
309  static_assert((std::is_base_of_v<_escape_base, Escapes> && ...));
311  }
312 
313  //=== access ===//
315  constexpr auto open() const
316  {
317  return Open{};
318  }
320  constexpr auto close() const
321  {
322  // Close never has any whitespace.
323  return Close{};
324  }
325 };
326 
328 template <typename Open, typename Close>
329 constexpr auto delimited(Open, Close)
330 {
331  LEXY_REQUIRE_BRANCH_RULE(Open, "delimited()");
332  LEXY_REQUIRE_BRANCH_RULE(Close, "delimited()");
333  return _delim_dsl<Open, Close>{};
334 }
335 
337 template <typename Delim>
338 constexpr auto delimited(Delim)
339 {
340  LEXY_REQUIRE_BRANCH_RULE(Delim, "delimited()");
341  return _delim_dsl<Delim, Delim>{};
342 }
343 
344 constexpr auto quoted = delimited(LEXY_LIT("\""));
345 constexpr auto triple_quoted = delimited(LEXY_LIT("\"\"\""));
346 
347 constexpr auto single_quoted = delimited(LEXY_LIT("'"));
348 
349 constexpr auto backticked = delimited(LEXY_LIT("`"));
350 constexpr auto double_backticked = delimited(LEXY_LIT("``"));
351 constexpr auto triple_backticked = delimited(LEXY_LIT("```"));
352 } // namespace lexyd
353 
354 namespace lexy
355 {
357 {
358  static LEXY_CONSTEVAL auto name()
359  {
360  return "invalid escape sequence";
361  }
362 };
363 } // namespace lexy
364 
365 namespace lexyd
366 {
367 template <typename Escape, typename... Branches>
369 {
370  static constexpr bool esc_is_literal = lexy::is_literal_rule<Escape>;
371  template <typename Encoding>
372  static constexpr auto esc_first_char() -> typename Encoding::char_type
373  {
374  return Escape::template lit_first_char<Encoding>();
375  }
376 
377  template <typename Context, typename Reader, typename Sink, typename Char>
378  static constexpr bool esc_try_parse(Context& context, Reader& reader, Sink& sink,
379  _del_chars<Char, Reader>& cur_chars)
380  {
381  auto begin = reader.position();
382 
383  // Check whether we're having the initial escape character.
385  if (!token.try_parse(context.control_block, reader))
386  // No need to call `.cancel()`; it's a token.
387  return false;
388 
389  // We do, so finish current character sequence and consume the escape token.
390  cur_chars.finish(context, sink, begin);
391  // It's a token, so this can't fail.
392  token.template finish<lexy::pattern_parser<>>(context, reader);
393 
394  // Try to parse the correct branch.
395  auto try_parse_branch = [&](auto branch) {
396  lexy::branch_parser_for<decltype(branch), Reader> parser{};
397  if (!parser.try_parse(context.control_block, reader))
398  {
399  parser.cancel(context);
400  return false;
401  }
402 
403  // This might fail, but we don't care:
404  // it will definitely consume the escape token, and everything that is a valid prefix.
405  // The remaining stuff is then just treated as part of the delimited.
406  parser.template finish<lexy::sink_parser>(context, reader, sink);
407  return true;
408  };
409  auto found = (try_parse_branch(Branches{}) || ...);
410 
411  if constexpr ((lexy::is_unconditional_branch_rule<Branches> || ...))
412  {
413  LEXY_ASSERT(found, "there is an unconditional branch");
414  }
415  else if (!found)
416  {
417  // We haven't found any branch of the escape sequence.
418  auto err = lexy::error<Reader, lexy::invalid_escape_sequence>(begin, reader.position());
419  context.on(_ev::error{}, err);
420  }
421 
422  // Restart the current character sequence after the escape sequence.
423  cur_chars.begin = reader.position();
424  return true;
425  }
426 
428  template <typename Branch>
429  constexpr auto rule(Branch) const
430  {
431  LEXY_REQUIRE_BRANCH_RULE(Branch, "escape()");
432  return _escape<Escape, Branches..., Branch>{};
433  }
434 
436  template <typename Branch>
437  constexpr auto capture(Branch branch) const
438  {
439  LEXY_REQUIRE_BRANCH_RULE(Branch, "escape()");
440  return this->rule(lexy::dsl::capture(branch));
441  }
442 
444  template <const auto& Table, typename Rule>
445  constexpr auto symbol(Rule rule) const
446  {
447  return this->rule(lexyd::symbol<Table>(rule));
448  }
449  template <const auto& Table>
450  constexpr auto symbol() const
451  {
452  return this->rule(lexyd::symbol<Table>);
453  }
454 };
455 
459 template <typename EscapeToken>
460 constexpr auto escape(EscapeToken)
461 {
462  static_assert(lexy::is_token_rule<EscapeToken>);
463  return _escape<EscapeToken>{};
464 }
465 
466 constexpr auto backslash_escape = escape(lit_c<'\\'>);
467 constexpr auto dollar_escape = escape(lit_c<'$'>);
468 } // namespace lexyd
469 
470 #endif // LEXY_DSL_DELIMITED_HPP_INCLUDED
471 
lexyd::_delim_dsl::operator()
constexpr auto operator()(Char, Escapes...) const
Sets the content.
Definition: delimited.hpp:306
lexyd::position
constexpr auto position
Produces an iterator to the current reader position without parsing anything.
Definition: position.hpp:79
LEXY_MOV
#define LEXY_MOV(...)
Definition: config.hpp:29
lexy::invalid_escape_sequence::name
static LEXY_CONSTEVAL auto name()
Definition: delimited.hpp:358
LEXY_CONSTEVAL
#define LEXY_CONSTEVAL
Definition: config.hpp:98
lexyd::token
constexpr auto token(Rule)
Turns the arbitrary rule into a token by matching it without producing any values.
Definition: dsl/token.hpp:215
lexy::parse_events::recovery_start
Definition: dsl/base.hpp:74
lexyd::_delim_dsl::limit
constexpr auto limit(LimitCharClass) const
Add char classes that will limit the delimited and specify the error.
Definition: delimited.hpp:297
lexyd::peek
constexpr auto peek(Rule)
Definition: peek.hpp:166
lexy::_detail::ascii_set_matcher
Definition: char_class.hpp:139
lexyd::quoted
constexpr auto quoted
Definition: delimited.hpp:344
lexyd::dollar_escape
constexpr auto dollar_escape
Definition: delimited.hpp:467
symbol.hpp
literal.hpp
LEXY_LIT
#define LEXY_LIT(Str)
Definition: literal.hpp:392
lexyd::escape
constexpr auto escape(EscapeToken)
Definition: delimited.hpp:460
magic_enum::char_type
string_view::value_type char_type
Definition: magic_enum.hpp:145
lexyd::delimited
constexpr auto delimited(Open, Close)
Parses everything between the two delimiters and captures it.
Definition: delimited.hpp:329
lexyd::_escape::capture
constexpr auto capture(Branch branch) const
Adds an escape rule that captures the branch.
Definition: delimited.hpp:437
lexy::_detail::parse_code_point
constexpr cp_result< Reader > parse_code_point(Reader reader)
Definition: _detail/code_point.hpp:142
lexy::branch_parser_for
typename BranchRule::template bp< Reader > branch_parser_for
Definition: dsl/base.hpp:116
lexyd::_del_chars::_del_chars
constexpr _del_chars(const Reader &reader)
Definition: delimited.hpp:34
lexyd::_delim_dsl::limit
constexpr auto limit(LimitCharClass) const
Add char classes that will limit the delimited to detect a missing terminator.
Definition: delimited.hpp:289
LEXY_FWD
#define LEXY_FWD(...)
Definition: config.hpp:30
lexyd::no_whitespace
constexpr auto no_whitespace(Rule)
Disables automatic skipping of whitespace for all tokens of the given rule.
Definition: whitespace.hpp:309
lexyd::_del::_loop
static LEXY_PARSER_FUNC bool _loop(CloseParser &close, Context &context, Reader &reader, Sink &sink)
Definition: delimited.hpp:210
lexyd::_escape::rule
constexpr auto rule(Branch) const
Adds a generic escape rule.
Definition: delimited.hpp:429
lexy
Definition: any_ref.hpp:12
lexyd::_del_limit::error
Error error
Definition: delimited.hpp:183
cx::end
constexpr auto end(const C &c) -> decltype(c.end())
Definition: wildcards.hpp:686
char_class.hpp
detail::void
j template void())
Definition: json.hpp:4893
LEXY_REQUIRE_BRANCH_RULE
#define LEXY_REQUIRE_BRANCH_RULE(Rule, Name)
Definition: grammar.hpp:73
lexy::_detail::swar_has_char
constexpr bool swar_has_char(swar_int v)
Definition: swar.hpp:177
lexyd::_del_limit< void, Error >::error
Error error
Definition: delimited.hpp:194
lexyd::_del_chars
Definition: delimited.hpp:30
lexy::error
Generic failure.
Definition: error.hpp:14
lexyd::_escape::esc_try_parse
static constexpr bool esc_try_parse(Context &context, Reader &reader, Sink &sink, _del_chars< Char, Reader > &cur_chars)
Definition: delimited.hpp:378
lexyd::_escape::symbol
constexpr auto symbol(Rule rule) const
Adds an escape rule that parses the symbol.
Definition: delimited.hpp:445
lexyd::single_quoted
constexpr auto single_quoted
Definition: delimited.hpp:347
lexyd::_del::p
Definition: delimited.hpp:253
lexyd::_del_chars::parse_swar
constexpr void parse_swar(Reader &reader, Close, Escs...)
Definition: delimited.hpp:53
lexyd::_del_limit
Definition: delimited.hpp:181
lexyd::_delim_dsl::close
constexpr auto close() const
Matches the closing delimiter.
Definition: delimited.hpp:320
swar.hpp
lexyd::_del_chars::finish
constexpr void finish(Context &context, Sink &sink, typename Reader::iterator end)
Definition: delimited.hpp:170
capture.hpp
lexy::parse_events::error
Definition: dsl/base.hpp:68
lexy::try_match_token
constexpr LEXY_FORCE_INLINE auto try_match_token(TokenRule, Reader &reader)
Definition: dsl/base.hpp:245
lexyd::_escape::esc_is_literal
static constexpr bool esc_is_literal
Definition: delimited.hpp:370
lexyd::_del_chars::begin
Reader::iterator begin
Definition: delimited.hpp:32
lexyd::_del_limit< void, Error >::peek
static constexpr bool peek(Reader reader)
Definition: delimited.hpp:197
lexyd::_delim_dsl
Definition: delimited.hpp:285
lexyd::_escape::esc_first_char
static constexpr auto esc_first_char() -> typename Encoding::char_type
Definition: delimited.hpp:372
lexyd::rule_base
Definition: grammar.hpp:17
lexy::missing_delimiter::name
static LEXY_CONSTEVAL auto name()
Definition: delimited.hpp:20
lexy::error_token_kind
@ error_token_kind
Definition: grammar.hpp:86
lexyd::backslash_escape
constexpr auto backslash_escape
Definition: delimited.hpp:466
lexy::parse_events::recovery_finish
Definition: dsl/base.hpp:79
lexyd::triple_backticked
constexpr auto triple_backticked
Definition: delimited.hpp:351
LEXY_PARSER_FUNC
#define LEXY_PARSER_FUNC
Definition: dsl/base.hpp:108
cx::begin
constexpr auto begin(const C &c) -> decltype(c.begin())
Definition: wildcards.hpp:661
lexyd::backticked
constexpr auto backticked
Definition: delimited.hpp:349
whitespace.hpp
base.hpp
lexyd::double_backticked
constexpr auto double_backticked
Definition: delimited.hpp:350
lexyd::_del::_limit
std::conditional_t< std::is_void_v< Limit >||lexy::is_token_rule< Limit >, _del_limit< Limit >, Limit > _limit
Definition: delimited.hpp:207
lexyd::_del::p::parse
static LEXY_PARSER_FUNC bool parse(Context &context, Reader &reader, Args &&... args)
Definition: delimited.hpp:256
lexy::parse_events::token
Definition: dsl/base.hpp:57
lexy::lexeme
Definition: lexeme.hpp:16
lexy::_detail::cp_error::success
@ success
lexyd::_del_limit::peek
static constexpr bool peek(Reader reader)
Definition: delimited.hpp:186
lexyd::_del
Definition: delimited.hpp:204
lexyd::_escape::symbol
constexpr auto symbol() const
Definition: delimited.hpp:450
lexyd::_del_chars::_recover
constexpr void _recover(Context &context, typename Reader::iterator recover_begin, typename Reader::iterator recover_end)
Definition: delimited.hpp:37
lexyd
Definition: trace.hpp:22
lexyd::triple_quoted
constexpr auto triple_quoted
Definition: delimited.hpp:345
lexyd::capture
constexpr auto capture(Token)
Captures whatever the token matches as a lexeme; does not include trailing whitespace.
Definition: capture.hpp:127
lexyd::eof
constexpr auto eof
Matches EOF.
Definition: eof.hpp:72
LEXY_ASSERT
#define LEXY_ASSERT(Expr, Msg)
Definition: assert.hpp:37
lexy::missing_delimiter
The reader ends before the closing delimiter was found.
Definition: delimited.hpp:18
lexyd::_escape
Definition: delimited.hpp:368
lexy::invalid_escape_sequence
Definition: delimited.hpp:356
lexyd::_escape_base
Definition: delimited.hpp:281
lexyd::_delim_dsl::open
constexpr auto open() const
Matches the open delimiter.
Definition: delimited.hpp:315
lexyd::_del_chars::parse_one
constexpr void parse_one(Context &context, Reader &reader, Sink &sink)
Definition: delimited.hpp:100


behaviortree_cpp_v4
Author(s): Davide Faconti
autogenerated on Fri Dec 13 2024 03:19:16